stdlib/strings: Access iterator through its position constructor.
[nit.git] / lib / standard / string.nit
index f62f988..524d4d4 100644 (file)
@@ -5,17 +5,17 @@
 #
 # This file is free software, which comes along with NIT.  This software is
 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
-# without  even  the implied warranty of  MERCHANTABILITY or  FITNESS FOR A 
+# without  even  the implied warranty of  MERCHANTABILITY or  FITNESS FOR A
 # PARTICULAR PURPOSE.  You can modify it is you want,  provided this header
 # is kept unaltered, and a notification of the changes is added.
 # You  are  allowed  to  redistribute it and sell it, alone or is a part of
 # another product.
 
 # Basic manipulations of strings of characters
-package string
+module string
 
+import math
 intrude import collection # FIXME should be collection::array
-import hash
 
 `{
 #include <stdio.h>
@@ -31,6 +31,8 @@ abstract class AbstractString
 
        readable private var _items: NativeString
 
+       fun chars: StringCharView is abstract
+
        # Access a character at `index` in the string.
        #
        #     assert "abcd"[2]         == 'c'
@@ -38,10 +40,10 @@ abstract class AbstractString
 
        # Create a substring.
        #
-       #     assert "abcd".substring(1, 2)         ==  "bc"
-       #     assert "abcd".substring(-1, )         ==  "a"
-       #     assert "abcd".substring(1, 0)         ==  ""
-       #     assert "abcd".substring(2, 5)         ==  "cd"
+       #     assert "abcd".substring(1, 2)      ==  "bc"
+       #     assert "abcd".substring(-1, 2)     ==  "a"
+       #     assert "abcd".substring(1, 0)      ==  ""
+       #     assert "abcd".substring(2, 5)      ==  "cd"
        #
        # A `from` index < 0 will be replaced by 0.
        # Unless a `count` value is > 0 at the same time.
@@ -55,7 +57,7 @@ abstract class AbstractString
                if from < count then
                        var r = new Buffer.with_capacity(count - from)
                        while from < count do
-                               r.push(_items[from])
+                               r.chars.push(_items[from])
                                from += 1
                        end
                        return r.to_s
@@ -66,9 +68,9 @@ abstract class AbstractString
 
        # Create a substring from `self` beginning at the `from` position
        #
-       #     assert "abcd".substring_from(1)        ==  "bcd"
-       #     assert "abcd".substring_from(-1)       ==  "abcd"
-       #     assert "abcd".substring_from(2)       ==  "cd"
+       #     assert "abcd".substring_from(1)    ==  "bcd"
+       #     assert "abcd".substring_from(-1)   ==  "abcd"
+       #     assert "abcd".substring_from(2)    ==  "cd"
        #
        # As with substring, a `from` index < 0 will be replaced by 0
        fun substring_from(from: Int): String
@@ -100,8 +102,9 @@ abstract class AbstractString
 
        # Is this string prefixed by `prefix`?
        #
-       #     assert "abcd".has_prefix("ab")         ==  true
-       #     assert "abcbc".has_prefix("bc")        ==  false
+       #     assert "abcd".has_prefix("ab")           ==  true
+       #     assert "abcbc".has_prefix("bc")          ==  false
+       #     assert "ab".has_prefix("abcd")           ==  false
        fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
 
        # Is this string suffixed by `suffix`?
@@ -124,7 +127,7 @@ abstract class AbstractString
        #
        #     assert "123".to_f        == 123.0
        #     assert "-1".to_f         == -1.0
-       #     assert "-1.2e-3".to_f    == -1.2e-3
+       #     assert "-1.2e-3".to_f    == -0.0012
        fun to_f: Float
        do
                # Shortcut
@@ -142,7 +145,7 @@ abstract class AbstractString
                var i = 0
                var neg = false
 
-               for c in self
+               for c in self.chars
                do
                        var v = c.to_i
                        if v > base then
@@ -173,7 +176,7 @@ abstract class AbstractString
        fun is_numeric: Bool
        do
                var has_point_or_comma = false
-               for i in self
+               for i in self.chars
                do
                        if not i.is_numeric
                        then
@@ -194,7 +197,7 @@ abstract class AbstractString
        fun to_upper: String
        do
                var s = new Buffer.with_capacity(length)
-               for i in self do s.add(i.to_upper)
+               for i in self.chars do s.add(i.to_upper)
                return s.to_s
        end
 
@@ -204,7 +207,7 @@ abstract class AbstractString
        fun to_lower : String
        do
                var s = new Buffer.with_capacity(length)
-               for i in self do s.add(i.to_lower)
+               for i in self.chars do s.add(i.to_lower)
                return s.to_s
        end
 
@@ -215,18 +218,18 @@ abstract class AbstractString
        #     assert "\na\nb\tc\t".trim          == "a\nb\tc"
        fun trim: String
        do
-               if self._length == 0 then return self.to_s
+               if self.length == 0 then return self.to_s
                # find position of the first non white space char (ascii < 32) from the start of the string
                var start_pos = 0
-               while self[start_pos].ascii <= 32 do
+               while self.chars[start_pos].ascii <= 32 do
                        start_pos += 1
-                       if start_pos == _length then return ""
+                       if start_pos == length then return ""
                end
                # find position of the first non white space char from the end of the string
                var end_pos = length - 1
-               while self[end_pos].ascii <= 32 do
+               while self.chars[end_pos].ascii <= 32 do
                        end_pos -= 1
-                       if end_pos == start_pos then return self[start_pos].to_s
+                       if end_pos == start_pos then return self.chars[start_pos].to_s
                end
                return self.substring(start_pos, end_pos - start_pos + 1)
        end
@@ -239,6 +242,164 @@ abstract class AbstractString
                        i += 1
                end
        end
+
+       # Mangle a string to be a unique string only made of alphanumeric characters
+       fun to_cmangle: String
+       do
+               var res = new Buffer
+               var underscore = false
+               for c in self.chars do
+                       if (c >= 'a' and c <= 'z') or (c >='A' and c <= 'Z') then
+                               res.add(c)
+                               underscore = false
+                               continue
+                       end
+                       if underscore then
+                               res.append('_'.ascii.to_s)
+                               res.add('d')
+                       end
+                       if c >= '0' and c <= '9' then
+                               res.add(c)
+                               underscore = false
+                       else if c == '_' then
+                               res.add(c)
+                               underscore = true
+                       else
+                               res.add('_')
+                               res.append(c.ascii.to_s)
+                               res.add('d')
+                               underscore = false
+                       end
+               end
+               return res.to_s
+       end
+
+       # Escape " \ ' and non printable characters using the rules of literal C strings and characters
+       #
+       #     assert "abAB12<>&".escape_to_c         == "abAB12<>&"
+       #     assert "\n\"'\\".escape_to_c         == "\\n\\\"\\'\\\\"
+       fun escape_to_c: String
+       do
+               var b = new Buffer
+               for c in self.chars do
+                       if c == '\n' then
+                               b.append("\\n")
+                       else if c == '\0' then
+                               b.append("\\0")
+                       else if c == '"' then
+                               b.append("\\\"")
+                       else if c == '\'' then
+                               b.append("\\\'")
+                       else if c == '\\' then
+                               b.append("\\\\")
+                       else if c.ascii < 32 then
+                               b.append("\\{c.ascii.to_base(8, false)}")
+                       else
+                               b.add(c)
+                       end
+               end
+               return b.to_s
+       end
+
+       # Escape additionnal characters
+       # The result might no be legal in C but be used in other languages
+       #
+       #     assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}"
+       fun escape_more_to_c(chars: String): String
+       do
+               var b = new Buffer
+               for c in escape_to_c do
+                       if chars.has(c) then
+                               b.add('\\')
+                       end
+                       b.add(c)
+               end
+               return b.to_s
+       end
+
+       # Escape to c plus braces
+       #
+       #     assert "\n\"'\\\{\}".escape_to_nit      == "\\n\\\"\\'\\\\\\\{\\\}"
+       fun escape_to_nit: String do return escape_more_to_c("\{\}")
+
+       # Return a string where Nit escape sequences are transformed.
+       #
+       # Example:
+       #     var s = "\\n"
+       #     assert s.length        ==  2
+       #     var u = s.unescape_nit
+       #     assert u.length        ==  1
+       #     assert u[0].ascii      ==  10 # (the ASCII value of the "new line" character)
+       fun unescape_nit: String
+       do
+               var res = new Buffer.with_capacity(self.length)
+               var was_slash = false
+               for c in self do
+                       if not was_slash then
+                               if c == '\\' then
+                                       was_slash = true
+                               else
+                                       res.add(c)
+                               end
+                               continue
+                       end
+                       was_slash = false
+                       if c == 'n' then
+                               res.add('\n')
+                       else if c == 'r' then
+                               res.add('\r')
+                       else if c == 't' then
+                               res.add('\t')
+                       else if c == '0' then
+                               res.add('\0')
+                       else
+                               res.add(c)
+                       end
+               end
+               return res.to_s
+       end
+end
+
+# Abstract class for the SequenceRead compatible
+# views on String and Buffer objects
+abstract class StringCharView
+       super SequenceRead[Char]
+
+       type SELFTYPE: AbstractString
+
+       private var target: SELFTYPE
+
+       private init(tgt: SELFTYPE)
+       do
+               target = tgt
+       end
+
+       redef fun is_empty do return target.is_empty
+
+       redef fun length do return target.length
+
+       redef fun iterator: IndexedIterator[Char] do return self.iterator_from(0)
+
+       fun iterator_from(pos: Int): IndexedIterator[Char] is abstract
+
+       redef fun has(c: Char): Bool
+       do
+               for i in self do
+                       if i == c then return true
+               end
+               return false
+       end
+
+end
+
+# View on Buffer objects, extends Sequence
+# for mutation operations
+abstract class BufferCharView
+       super StringCharView
+       super Sequence[Char]
+
+       redef type SELFTYPE: Buffer
+
 end
 
 # Immutable strings of characters.
@@ -255,6 +416,8 @@ class String
        # Indes in _items of the last item of the string
        readable var _index_to: Int
 
+       redef var chars: StringCharView = new FlatStringCharView(self)
+
        ################################################
        #       AbstractString specific methods        #
        ################################################
@@ -279,11 +442,13 @@ class String
 
                var realFrom = _index_from + from
 
-               if (realFrom + count) > _index_to then return new String.from_substring(realFrom, _index_to, _items)
+               if (realFrom + count) > _index_to then return new String.with_infos(_items, _index_to - realFrom + 1, realFrom, _index_to)
 
                if count == 0 then return ""
 
-               return new String.from_substring(realFrom, realFrom + count - 1, _items)
+               var to = realFrom + count - 1
+
+               return new String.with_infos(_items, to - realFrom + 1, realFrom, to)
        end
 
        redef fun substring_from(from: Int): String
@@ -391,49 +556,6 @@ class String
        #              String Specific Methods           #
        ##################################################
 
-       # Creates a String object as a substring of another String
-       #
-       # From : index to start at
-       #
-       # To : Index to stop at (from + count -1)
-       #
-       private init from_substring(from: Int, to: Int, internalString: NativeString)
-       do
-               _items = internalString
-               _index_from = from
-               _index_to = to
-               _length = to - from + 1
-       end
-
-       # Create a new string from a given char *.
-       private init with_native(nat: NativeString, size: Int)
-       do
-               assert size >= 0
-               _items = nat
-               _length = size
-               _index_from = 0
-               _index_to = _length - 1
-       end
-
-       # Create a new string from a null terminated char *.
-       private init from_cstring(str: NativeString)
-       do
-               with_native(str,str.cstring_length)
-       end
-
-       # Creates a new Nit String from an existing CString
-       # Pretty much equals to from_cstring but copies instead
-       # of passing a reference
-       # Avoids manual/automatic dealloc problems when dealing with native C code
-       private init copy_from_native(str: NativeString)
-       do
-               var temp_length = str.cstring_length
-               var new_str = calloc_string(temp_length + 1)
-               str.copy_to(new_str, temp_length, 0, 0)
-               new_str[temp_length] = '\0'
-               with_native(new_str, temp_length)
-       end
-
        private init with_infos(items: NativeString, len: Int, from: Int, to: Int)
        do
                self._items = items
@@ -456,7 +578,7 @@ class String
 
        redef fun ==(other)
        do
-               if not other isa String or other is null then return false
+               if not other isa String then return false
 
                if self.object_id == other.object_id then return true
 
@@ -483,7 +605,7 @@ class String
 
        # The comparison between two strings is done on a lexicographical basis
        #
-       #     assert "aa" < "b"      ==  true
+       #     assert ("aa" < "b")      ==  true
        redef fun <(other)
        do
                if self.object_id == other.object_id then return false
@@ -589,6 +711,50 @@ class String
        end
 end
 
+private class FlatStringIterator
+       super IndexedIterator[Char]
+
+       var target: String
+
+       var target_items: NativeString
+
+       var curr_pos: Int
+
+       init with_pos(tgt: String, pos: Int)
+       do
+               target = tgt
+               target_items = tgt.items
+               curr_pos = pos + target.index_from
+       end
+
+       redef fun is_ok do return curr_pos <= target.index_to
+
+       redef fun item do return target_items[curr_pos]
+
+       redef fun next do curr_pos += 1
+
+       redef fun index do return curr_pos - target.index_from
+
+end
+
+private class FlatStringCharView
+       super StringCharView
+
+       redef type SELFTYPE: String
+
+       redef fun [](index)
+       do
+               # Check that the index (+ index_from) is not larger than indexTo
+               # In other terms, if the index is valid
+               assert index >= 0
+               assert (index + target._index_from) <= target._index_to
+               return target._items[index + target._index_from]
+       end
+
+       redef fun iterator_from(start) do return new FlatStringIterator.with_pos(target, start)
+
+end
+
 # Mutable strings of characters.
 class Buffer
        super AbstractString
@@ -598,6 +764,8 @@ class Buffer
 
        redef type OTHER: String
 
+       redef var chars: BufferCharView = new FlatBufferCharView(self)
+
        redef fun []=(index, item)
        do
                if index == length then
@@ -656,8 +824,8 @@ class Buffer
                var l1 = length
                var l2 = s.length
                while i < l1 and i < l2 do
-                       var c1 = self[i].ascii
-                       var c2 = s[i].ascii
+                       var c1 = self.chars[i].ascii
+                       var c2 = s.chars[i].ascii
                        if c1 < c2 then
                                return true
                        else if c2 < c1 then
@@ -698,7 +866,7 @@ class Buffer
 
        redef fun ==(o)
        do
-               if not o isa Buffer or o is null then return false
+               if not o isa Buffer then return false
                var l = length
                if o.length != l then return false
                var i = 0
@@ -714,6 +882,76 @@ class Buffer
        readable private var _capacity: Int
 end
 
+private class FlatBufferCharView
+       super BufferCharView
+       super StringCapable
+
+       redef type SELFTYPE: Buffer
+
+       redef fun [](index) do return target._items[index]
+
+       redef fun []=(index, item)
+       do
+               assert index >= 0 and index <= length
+               if index == length then
+                       add(item)
+                       return
+               end
+               target._items[index] = item
+       end
+
+       redef fun push(c)
+       do
+               target.add(c)
+       end
+
+       redef fun add(c)
+       do
+               target.add(c)
+       end
+
+       fun enlarge(cap: Int)
+       do
+               target.enlarge(cap)
+       end
+
+       redef fun append(s)
+       do
+               var my_items = target.items
+               var s_length = s.length
+               if target.capacity < s.length then enlarge(s_length + target.length)
+       end
+
+       redef fun iterator_from(pos) do return new FlatBufferIterator.with_pos(target, pos)
+
+end
+
+private class FlatBufferIterator
+       super IndexedIterator[Char]
+
+       var target: Buffer
+
+       var target_items: NativeString
+
+       var curr_pos: Int
+
+       init with_pos(tgt: Buffer, pos: Int)
+       do
+               target = tgt
+               target_items = tgt.items
+               curr_pos = pos
+       end
+
+       redef fun index do return curr_pos
+
+       redef fun is_ok do return curr_pos < target.length
+
+       redef fun item do return target_items[curr_pos]
+
+       redef fun next do curr_pos += 1
+
+end
+
 ###############################################################################
 # Refinement                                                                  #
 ###############################################################################
@@ -754,13 +992,13 @@ redef class Bool
        #     assert true.to_s         == "true"
        #     assert false.to_s        == "false"
        redef fun to_s
-       do 
-               if self then 
-                       return once "true" 
-               else 
-                       return once "false" 
+       do
+               if self then
+                       return once "true"
+               else
+                       return once "false"
                end
-       end   
+       end
 end
 
 redef class Int
@@ -772,17 +1010,17 @@ redef class Int
                # Sign
                if self < 0 then
                        n = - self
-                       s[0] = '-'
+                       s.chars[0] = '-'
                else if self == 0 then
-                       s[0] = '0'
+                       s.chars[0] = '0'
                        return
                else
                        n = self
                end
                # Fill digits
                var pos = digit_count(base) - 1
-               while pos >= 0 and n > 0 do 
-                       s[pos] = (n % base).to_c
+               while pos >= 0 and n > 0 do
+                       s.chars[pos] = (n % base).to_c
                        n = n / base # /
                        pos -= 1
                end
@@ -817,10 +1055,11 @@ redef class Float
        # Pretty print self, print needoed decimals up to a max of 3.
        redef fun to_s do
                var str = to_precision( 3 )
+               if is_inf != 0 or is_nan then return str
                var len = str.length
                for i in [0..len-1] do
                        var j = len-1-i
-                       var c = str[j]
+                       var c = str.chars[j]
                        if c == '0' then
                                continue
                        else if c == '.' then
@@ -835,6 +1074,15 @@ redef class Float
        # `self` representation with `nb` digits after the '.'.
        fun to_precision(nb: Int): String
        do
+               if is_nan then return "nan"
+
+               var isinf = self.is_inf
+               if isinf == 1 then
+                       return "inf"
+               else if isinf == -1 then
+                       return  "-inf"
+               end
+
                if nb == 0 then return self.to_i.to_s
                var f = self
                for i in [0..nb[ do f = f * 10.0
@@ -856,7 +1104,7 @@ redef class Float
                end
        end
 
-       fun to_precision_native(nb: Int): String import NativeString::to_s `{
+       fun to_precision_native(nb: Int): String import NativeString.to_s `{
                int size;
                char *str;
 
@@ -873,7 +1121,7 @@ redef class Char
        redef fun to_s
        do
                var s = new Buffer.with_capacity(1)
-               s[0] = self
+               s.chars[0] = self
                return s.to_s
        end
 
@@ -918,14 +1166,14 @@ redef class Collection[E]
        fun join(sep: String): String
        do
                if is_empty then return ""
-               
+
                var s = new Buffer # Result
 
                # Concat first item
                var i = iterator
                var e = i.item
                if e != null then s.append(e.to_s)
-               
+
                # Concat other items
                i.next
                while i.is_ok do
@@ -966,22 +1214,22 @@ redef class Map[K,V]
        fun join(sep: String, couple_sep: String): String
        do
                if is_empty then return ""
-               
+
                var s = new Buffer # Result
 
                # Concat first item
                var i = iterator
                var k = i.key
                var e = i.item
-               if e != null then s.append("{k}{couple_sep}{e}")
-               
+               s.append("{k}{couple_sep}{e or else "<null>"}")
+
                # Concat other items
                i.next
                while i.is_ok do
                        s.append(sep)
                        k = i.key
                        e = i.item
-                       if e != null then s.append("{k}{couple_sep}{e}")
+                       s.append("{k}{couple_sep}{e or else "<null>"}")
                        i.next
                end
                return s.to_s