stdlib/strings: Access iterator through its position constructor.
[nit.git] / lib / standard / string.nit
index 647eacd..524d4d4 100644 (file)
@@ -5,17 +5,17 @@
 #
 # This file is free software, which comes along with NIT.  This software is
 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
-# without  even  the implied warranty of  MERCHANTABILITY or  FITNESS FOR A 
+# without  even  the implied warranty of  MERCHANTABILITY or  FITNESS FOR A
 # PARTICULAR PURPOSE.  You can modify it is you want,  provided this header
 # is kept unaltered, and a notification of the changes is added.
 # You  are  allowed  to  redistribute it and sell it, alone or is a part of
 # another product.
 
 # Basic manipulations of strings of characters
-package string
+module string
 
+import math
 intrude import collection # FIXME should be collection::array
-import hash
 
 `{
 #include <stdio.h>
@@ -31,6 +31,8 @@ abstract class AbstractString
 
        readable private var _items: NativeString
 
+       fun chars: StringCharView is abstract
+
        # Access a character at `index` in the string.
        #
        #     assert "abcd"[2]         == 'c'
@@ -38,10 +40,10 @@ abstract class AbstractString
 
        # Create a substring.
        #
-       #     assert "abcd".substring(1, 2)         ==  "bc"
-       #     assert "abcd".substring(-1, )         ==  "a"
-       #     assert "abcd".substring(1, 0)         ==  ""
-       #     assert "abcd".substring(2, 5)         ==  "cd"
+       #     assert "abcd".substring(1, 2)      ==  "bc"
+       #     assert "abcd".substring(-1, 2)     ==  "a"
+       #     assert "abcd".substring(1, 0)      ==  ""
+       #     assert "abcd".substring(2, 5)      ==  "cd"
        #
        # A `from` index < 0 will be replaced by 0.
        # Unless a `count` value is > 0 at the same time.
@@ -55,7 +57,7 @@ abstract class AbstractString
                if from < count then
                        var r = new Buffer.with_capacity(count - from)
                        while from < count do
-                               r.push(_items[from])
+                               r.chars.push(_items[from])
                                from += 1
                        end
                        return r.to_s
@@ -66,9 +68,9 @@ abstract class AbstractString
 
        # Create a substring from `self` beginning at the `from` position
        #
-       #     assert "abcd".substring_from(1)        ==  "bcd"
-       #     assert "abcd".substring_from(-1)       ==  "abcd"
-       #     assert "abcd".substring_from(2)       ==  "cd"
+       #     assert "abcd".substring_from(1)    ==  "bcd"
+       #     assert "abcd".substring_from(-1)   ==  "abcd"
+       #     assert "abcd".substring_from(2)    ==  "cd"
        #
        # As with substring, a `from` index < 0 will be replaced by 0
        fun substring_from(from: Int): String
@@ -100,8 +102,9 @@ abstract class AbstractString
 
        # Is this string prefixed by `prefix`?
        #
-       #     assert "abcd".has_prefix("ab")         ==  true
-       #     assert "abcbc".has_prefix("bc")        ==  false
+       #     assert "abcd".has_prefix("ab")           ==  true
+       #     assert "abcbc".has_prefix("bc")          ==  false
+       #     assert "ab".has_prefix("abcd")           ==  false
        fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
 
        # Is this string suffixed by `suffix`?
@@ -124,7 +127,7 @@ abstract class AbstractString
        #
        #     assert "123".to_f        == 123.0
        #     assert "-1".to_f         == -1.0
-       #     assert "-1.2e-3".to_f    == -1.2e-3
+       #     assert "-1.2e-3".to_f    == -0.0012
        fun to_f: Float
        do
                # Shortcut
@@ -142,7 +145,7 @@ abstract class AbstractString
                var i = 0
                var neg = false
 
-               for c in self
+               for c in self.chars
                do
                        var v = c.to_i
                        if v > base then
@@ -173,7 +176,7 @@ abstract class AbstractString
        fun is_numeric: Bool
        do
                var has_point_or_comma = false
-               for i in self
+               for i in self.chars
                do
                        if not i.is_numeric
                        then
@@ -194,7 +197,7 @@ abstract class AbstractString
        fun to_upper: String
        do
                var s = new Buffer.with_capacity(length)
-               for i in self do s.add(i.to_upper)
+               for i in self.chars do s.add(i.to_upper)
                return s.to_s
        end
 
@@ -204,7 +207,7 @@ abstract class AbstractString
        fun to_lower : String
        do
                var s = new Buffer.with_capacity(length)
-               for i in self do s.add(i.to_lower)
+               for i in self.chars do s.add(i.to_lower)
                return s.to_s
        end
 
@@ -215,18 +218,18 @@ abstract class AbstractString
        #     assert "\na\nb\tc\t".trim          == "a\nb\tc"
        fun trim: String
        do
-               if self._length == 0 then return self.to_s
+               if self.length == 0 then return self.to_s
                # find position of the first non white space char (ascii < 32) from the start of the string
                var start_pos = 0
-               while self[start_pos].ascii <= 32 do
+               while self.chars[start_pos].ascii <= 32 do
                        start_pos += 1
-                       if start_pos == _length then return ""
+                       if start_pos == length then return ""
                end
                # find position of the first non white space char from the end of the string
                var end_pos = length - 1
-               while self[end_pos].ascii <= 32 do
+               while self.chars[end_pos].ascii <= 32 do
                        end_pos -= 1
-                       if end_pos == start_pos then return self[start_pos].to_s
+                       if end_pos == start_pos then return self.chars[start_pos].to_s
                end
                return self.substring(start_pos, end_pos - start_pos + 1)
        end
@@ -245,7 +248,7 @@ abstract class AbstractString
        do
                var res = new Buffer
                var underscore = false
-               for c in self do
+               for c in self.chars do
                        if (c >= 'a' and c <= 'z') or (c >='A' and c <= 'Z') then
                                res.add(c)
                                underscore = false
@@ -278,7 +281,7 @@ abstract class AbstractString
        fun escape_to_c: String
        do
                var b = new Buffer
-               for c in self do
+               for c in self.chars do
                        if c == '\n' then
                                b.append("\\n")
                        else if c == '\0' then
@@ -297,6 +300,106 @@ abstract class AbstractString
                end
                return b.to_s
        end
+
+       # Escape additionnal characters
+       # The result might no be legal in C but be used in other languages
+       #
+       #     assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}"
+       fun escape_more_to_c(chars: String): String
+       do
+               var b = new Buffer
+               for c in escape_to_c do
+                       if chars.has(c) then
+                               b.add('\\')
+                       end
+                       b.add(c)
+               end
+               return b.to_s
+       end
+
+       # Escape to c plus braces
+       #
+       #     assert "\n\"'\\\{\}".escape_to_nit      == "\\n\\\"\\'\\\\\\\{\\\}"
+       fun escape_to_nit: String do return escape_more_to_c("\{\}")
+
+       # Return a string where Nit escape sequences are transformed.
+       #
+       # Example:
+       #     var s = "\\n"
+       #     assert s.length        ==  2
+       #     var u = s.unescape_nit
+       #     assert u.length        ==  1
+       #     assert u[0].ascii      ==  10 # (the ASCII value of the "new line" character)
+       fun unescape_nit: String
+       do
+               var res = new Buffer.with_capacity(self.length)
+               var was_slash = false
+               for c in self do
+                       if not was_slash then
+                               if c == '\\' then
+                                       was_slash = true
+                               else
+                                       res.add(c)
+                               end
+                               continue
+                       end
+                       was_slash = false
+                       if c == 'n' then
+                               res.add('\n')
+                       else if c == 'r' then
+                               res.add('\r')
+                       else if c == 't' then
+                               res.add('\t')
+                       else if c == '0' then
+                               res.add('\0')
+                       else
+                               res.add(c)
+                       end
+               end
+               return res.to_s
+       end
+end
+
+# Abstract class for the SequenceRead compatible
+# views on String and Buffer objects
+abstract class StringCharView
+       super SequenceRead[Char]
+
+       type SELFTYPE: AbstractString
+
+       private var target: SELFTYPE
+
+       private init(tgt: SELFTYPE)
+       do
+               target = tgt
+       end
+
+       redef fun is_empty do return target.is_empty
+
+       redef fun length do return target.length
+
+       redef fun iterator: IndexedIterator[Char] do return self.iterator_from(0)
+
+       fun iterator_from(pos: Int): IndexedIterator[Char] is abstract
+
+       redef fun has(c: Char): Bool
+       do
+               for i in self do
+                       if i == c then return true
+               end
+               return false
+       end
+
+end
+
+# View on Buffer objects, extends Sequence
+# for mutation operations
+abstract class BufferCharView
+       super StringCharView
+       super Sequence[Char]
+
+       redef type SELFTYPE: Buffer
+
 end
 
 # Immutable strings of characters.
@@ -313,6 +416,8 @@ class String
        # Indes in _items of the last item of the string
        readable var _index_to: Int
 
+       redef var chars: StringCharView = new FlatStringCharView(self)
+
        ################################################
        #       AbstractString specific methods        #
        ################################################
@@ -337,11 +442,13 @@ class String
 
                var realFrom = _index_from + from
 
-               if (realFrom + count) > _index_to then return new String.from_substring(realFrom, _index_to, _items)
+               if (realFrom + count) > _index_to then return new String.with_infos(_items, _index_to - realFrom + 1, realFrom, _index_to)
 
                if count == 0 then return ""
 
-               return new String.from_substring(realFrom, realFrom + count - 1, _items)
+               var to = realFrom + count - 1
+
+               return new String.with_infos(_items, to - realFrom + 1, realFrom, to)
        end
 
        redef fun substring_from(from: Int): String
@@ -392,7 +499,7 @@ class String
 
                outstr[self.length] = '\0'
 
-               return new String.with_native(outstr, self._length)
+               return outstr.to_s_with_length(self._length)
        end
 
        redef fun to_lower : String
@@ -412,7 +519,7 @@ class String
 
                outstr[self.length] = '\0'
 
-               return new String.with_native(outstr, self._length)
+               return outstr.to_s_with_length(self._length)
        end
 
        redef fun trim: String
@@ -449,53 +556,17 @@ class String
        #              String Specific Methods           #
        ##################################################
 
-       # Creates a String object as a substring of another String
-       #
-       # From : index to start at
-       #
-       # To : Index to stop at (from + count -1)
-       #
-       private init from_substring(from: Int, to: Int, internalString: NativeString)
+       private init with_infos(items: NativeString, len: Int, from: Int, to: Int)
        do
-               _items = internalString
+               self._items = items
+               _length = len
                _index_from = from
                _index_to = to
-               _length = to - from + 1
-       end
-
-       # Create a new string from a given char *.
-       init with_native(nat: NativeString, size: Int)
-       do
-               assert size >= 0
-               _items = nat
-               _length = size
-               _index_from = 0
-               _index_to = _length - 1
-       end
-
-       # Create a new string from a null terminated char *.
-       init from_cstring(str: NativeString)
-       do
-               with_native(str,str.cstring_length)
-       end
-
-       # Creates a new Nit String from an existing CString
-       # Pretty much equals to from_cstring but copies instead
-       # of passing a reference
-       # Avoids manual/automatic dealloc problems when dealing with native C code
-       init copy_from_native(str: NativeString)
-       do
-               var temp_length = str.cstring_length
-               var new_str = calloc_string(temp_length + 1)
-               str.copy_to(new_str, temp_length, 0, 0)
-               new_str[temp_length] = '\0'
-               with_native(new_str, temp_length)
        end
 
        # Return a null terminated char *
        fun to_cstring: NativeString
        do
-               #return items
                if _index_from > 0 or _index_to != items.cstring_length - 1 then
                        var newItems = calloc_string(_length + 1)
                        self.items.copy_to(newItems, _length, _index_from, 0)
@@ -507,7 +578,7 @@ class String
 
        redef fun ==(other)
        do
-               if not other isa String or other is null then return false
+               if not other isa String then return false
 
                if self.object_id == other.object_id then return true
 
@@ -534,7 +605,7 @@ class String
 
        # The comparison between two strings is done on a lexicographical basis
        #
-       #     assert "aa" < "b"      ==  true
+       #     assert ("aa" < "b")      ==  true
        redef fun <(other)
        do
                if self.object_id == other.object_id then return false
@@ -577,14 +648,16 @@ class String
                var my_length = self._length
                var its_length = s._length
 
+               var total_length = my_length + its_length
+
                var target_string = calloc_string(my_length + its_length + 1)
 
                self._items.copy_to(target_string, my_length, _index_from, 0)
                s._items.copy_to(target_string, its_length, s._index_from, my_length)
 
-               target_string[my_length + its_length] = '\0'
+               target_string[total_length] = '\0'
 
-               return new String.with_native(target_string, my_length + its_length)
+               return target_string.to_s_with_length(total_length)
        end
 
        # `i` repetitions of `self`
@@ -613,7 +686,7 @@ class String
                        current_last += my_length
                end
 
-               return new String.with_native(target_string, final_length)
+               return target_string.to_s_with_length(final_length)
        end
 
        redef fun to_s do return self
@@ -638,6 +711,50 @@ class String
        end
 end
 
+private class FlatStringIterator
+       super IndexedIterator[Char]
+
+       var target: String
+
+       var target_items: NativeString
+
+       var curr_pos: Int
+
+       init with_pos(tgt: String, pos: Int)
+       do
+               target = tgt
+               target_items = tgt.items
+               curr_pos = pos + target.index_from
+       end
+
+       redef fun is_ok do return curr_pos <= target.index_to
+
+       redef fun item do return target_items[curr_pos]
+
+       redef fun next do curr_pos += 1
+
+       redef fun index do return curr_pos - target.index_from
+
+end
+
+private class FlatStringCharView
+       super StringCharView
+
+       redef type SELFTYPE: String
+
+       redef fun [](index)
+       do
+               # Check that the index (+ index_from) is not larger than indexTo
+               # In other terms, if the index is valid
+               assert index >= 0
+               assert (index + target._index_from) <= target._index_to
+               return target._items[index + target._index_from]
+       end
+
+       redef fun iterator_from(start) do return new FlatStringIterator.with_pos(target, start)
+
+end
+
 # Mutable strings of characters.
 class Buffer
        super AbstractString
@@ -647,6 +764,8 @@ class Buffer
 
        redef type OTHER: String
 
+       redef var chars: BufferCharView = new FlatBufferCharView(self)
+
        redef fun []=(index, item)
        do
                if index == length then
@@ -696,7 +815,7 @@ class Buffer
                # Ensure the afterlast byte is '\0' to nul-terminated char *
                a[length] = '\0'
 
-               return new String.with_native(a, length)
+               return a.to_s_with_length(length)
        end
 
        redef fun <(s)
@@ -705,8 +824,8 @@ class Buffer
                var l1 = length
                var l2 = s.length
                while i < l1 and i < l2 do
-                       var c1 = self[i].ascii
-                       var c2 = s[i].ascii
+                       var c1 = self.chars[i].ascii
+                       var c2 = s.chars[i].ascii
                        if c1 < c2 then
                                return true
                        else if c2 < c1 then
@@ -747,7 +866,7 @@ class Buffer
 
        redef fun ==(o)
        do
-               if not o isa Buffer or o is null then return false
+               if not o isa Buffer then return false
                var l = length
                if o.length != l then return false
                var i = 0
@@ -763,6 +882,76 @@ class Buffer
        readable private var _capacity: Int
 end
 
+private class FlatBufferCharView
+       super BufferCharView
+       super StringCapable
+
+       redef type SELFTYPE: Buffer
+
+       redef fun [](index) do return target._items[index]
+
+       redef fun []=(index, item)
+       do
+               assert index >= 0 and index <= length
+               if index == length then
+                       add(item)
+                       return
+               end
+               target._items[index] = item
+       end
+
+       redef fun push(c)
+       do
+               target.add(c)
+       end
+
+       redef fun add(c)
+       do
+               target.add(c)
+       end
+
+       fun enlarge(cap: Int)
+       do
+               target.enlarge(cap)
+       end
+
+       redef fun append(s)
+       do
+               var my_items = target.items
+               var s_length = s.length
+               if target.capacity < s.length then enlarge(s_length + target.length)
+       end
+
+       redef fun iterator_from(pos) do return new FlatBufferIterator.with_pos(target, pos)
+
+end
+
+private class FlatBufferIterator
+       super IndexedIterator[Char]
+
+       var target: Buffer
+
+       var target_items: NativeString
+
+       var curr_pos: Int
+
+       init with_pos(tgt: Buffer, pos: Int)
+       do
+               target = tgt
+               target_items = tgt.items
+               curr_pos = pos
+       end
+
+       redef fun index do return curr_pos
+
+       redef fun is_ok do return curr_pos < target.length
+
+       redef fun item do return target_items[curr_pos]
+
+       redef fun next do curr_pos += 1
+
+end
+
 ###############################################################################
 # Refinement                                                                  #
 ###############################################################################
@@ -777,7 +966,7 @@ redef class Object
        # The class name of the object.
        #
        #    assert 5.class_name == "Int"
-       fun class_name: String do return new String.from_cstring(native_class_name)
+       fun class_name: String do return native_class_name.to_s
 
        # Developer readable representation of `self`.
        # Usually, it uses the form "<CLASSNAME:#OBJECTID bla bla bla>"
@@ -803,13 +992,13 @@ redef class Bool
        #     assert true.to_s         == "true"
        #     assert false.to_s        == "false"
        redef fun to_s
-       do 
-               if self then 
-                       return once "true" 
-               else 
-                       return once "false" 
+       do
+               if self then
+                       return once "true"
+               else
+                       return once "false"
                end
-       end   
+       end
 end
 
 redef class Int
@@ -821,17 +1010,17 @@ redef class Int
                # Sign
                if self < 0 then
                        n = - self
-                       s[0] = '-'
+                       s.chars[0] = '-'
                else if self == 0 then
-                       s[0] = '0'
+                       s.chars[0] = '0'
                        return
                else
                        n = self
                end
                # Fill digits
                var pos = digit_count(base) - 1
-               while pos >= 0 and n > 0 do 
-                       s[pos] = (n % base).to_c
+               while pos >= 0 and n > 0 do
+                       s.chars[pos] = (n % base).to_c
                        n = n / base # /
                        pos -= 1
                end
@@ -846,7 +1035,7 @@ redef class Int
        #     assert (-123).to_s       == "-123"
        redef fun to_s do
                var len = digit_count(10)
-               return new String.from_cstring(native_int_to_s(len))
+               return native_int_to_s(len).to_s_with_length(len)
        end
 
        # return displayable int in hexadecimal (unsigned (not now))
@@ -866,10 +1055,11 @@ redef class Float
        # Pretty print self, print needoed decimals up to a max of 3.
        redef fun to_s do
                var str = to_precision( 3 )
+               if is_inf != 0 or is_nan then return str
                var len = str.length
                for i in [0..len-1] do
                        var j = len-1-i
-                       var c = str[j]
+                       var c = str.chars[j]
                        if c == '0' then
                                continue
                        else if c == '.' then
@@ -884,6 +1074,15 @@ redef class Float
        # `self` representation with `nb` digits after the '.'.
        fun to_precision(nb: Int): String
        do
+               if is_nan then return "nan"
+
+               var isinf = self.is_inf
+               if isinf == 1 then
+                       return "inf"
+               else if isinf == -1 then
+                       return  "-inf"
+               end
+
                if nb == 0 then return self.to_i.to_s
                var f = self
                for i in [0..nb[ do f = f * 10.0
@@ -905,7 +1104,7 @@ redef class Float
                end
        end
 
-       fun to_precision_native(nb: Int): String import String::from_cstring `{
+       fun to_precision_native(nb: Int): String import NativeString.to_s `{
                int size;
                char *str;
 
@@ -913,7 +1112,7 @@ redef class Float
                str = malloc(size + 1);
                sprintf(str, "%.*f", (int)nb, recv );
 
-               return new_String_from_cstring( str );
+               return NativeString_to_s( str );
        `}
 end
 
@@ -922,7 +1121,7 @@ redef class Char
        redef fun to_s
        do
                var s = new Buffer.with_capacity(1)
-               s[0] = self
+               s.chars[0] = self
                return s.to_s
        end
 
@@ -967,14 +1166,14 @@ redef class Collection[E]
        fun join(sep: String): String
        do
                if is_empty then return ""
-               
+
                var s = new Buffer # Result
 
                # Concat first item
                var i = iterator
                var e = i.item
                if e != null then s.append(e.to_s)
-               
+
                # Concat other items
                i.next
                while i.is_ok do
@@ -1015,22 +1214,22 @@ redef class Map[K,V]
        fun join(sep: String, couple_sep: String): String
        do
                if is_empty then return ""
-               
+
                var s = new Buffer # Result
 
                # Concat first item
                var i = iterator
                var k = i.key
                var e = i.item
-               if e != null then s.append("{k}{couple_sep}{e}")
-               
+               s.append("{k}{couple_sep}{e or else "<null>"}")
+
                # Concat other items
                i.next
                while i.is_ok do
                        s.append(sep)
                        k = i.key
                        e = i.item
-                       if e != null then s.append("{k}{couple_sep}{e}")
+                       s.append("{k}{couple_sep}{e or else "<null>"}")
                        i.next
                end
                return s.to_s
@@ -1043,10 +1242,12 @@ end
 
 # Native strings are simple C char *
 class NativeString
+       super StringCapable
+
        fun [](index: Int): Char is intern
        fun []=(index: Int, item: Char) is intern
        fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
-       
+
        # Position of the first nul character.
        fun cstring_length: Int
        do
@@ -1056,6 +1257,26 @@ class NativeString
        end
        fun atoi: Int is intern
        fun atof: Float is extern "atof"
+
+       redef fun to_s
+       do
+               return to_s_with_length(cstring_length)
+       end
+
+       fun to_s_with_length(length: Int): String
+       do
+               assert length >= 0
+               return new String.with_infos(self, length, 0, length - 1)
+       end
+
+       fun to_s_with_copy: String
+       do
+               var length = cstring_length
+               var new_self = calloc_string(length + 1)
+               copy_to(new_self, length, 0, 0)
+               return new String.with_infos(new_self, length, 0, length - 1)
+       end
+
 end
 
 # StringCapable objects can create native strings
@@ -1075,7 +1296,7 @@ redef class Sys
        # The name of the program as given by the OS
        fun program_name: String
        do
-               return new String.from_cstring(native_argv(0))
+               return native_argv(0).to_s
        end
 
        # Initialize `args` with the contents of `native_argc` and `native_argv`.
@@ -1085,7 +1306,7 @@ redef class Sys
                var args = new Array[String].with_capacity(0)
                var i = 1
                while i < argc do
-                       args[i-1] = new String.from_cstring(native_argv(i))
+                       args[i-1] = native_argv(i).to_s
                        i += 1
                end
                _args_cache = args