lib: replace `String.has(Char)` with `String.has(Pattern)`
[nit.git] / lib / standard / string.nit
index 74ec7a2..38bd492 100644 (file)
@@ -15,7 +15,8 @@
 module string
 
 import math
-intrude import collection # FIXME should be collection::array
+import collection
+intrude import collection::array
 
 `{
 #include <stdio.h>
@@ -156,13 +157,6 @@ abstract class Text
                return self.chars.iterator
        end
 
-       # Is 'c' contained in self ?
-       #
-       # DEPRECATED : Use self.chars.has instead
-       fun has(c: Char): Bool
-       do
-               return self.chars.has(c)
-       end
 
        # Gets an Array containing the chars of self
        #
@@ -178,7 +172,7 @@ abstract class Text
        # As with substring, a `from` index < 0 will be replaced by 0
        fun substring_from(from: Int): SELFTYPE
        do
-               if from > self.length then return empty
+               if from >= self.length then return empty
                if from < 0 then from = 0
                return substring(from, length - from)
        end
@@ -247,8 +241,8 @@ abstract class Text
                var i = 0
                var neg = false
 
-               for c in self.chars
-               do
+               for j in [0..length[ do
+                       var c = chars[j]
                        var v = c.to_i
                        if v > base then
                                if neg then
@@ -278,12 +272,10 @@ abstract class Text
        fun is_numeric: Bool
        do
                var has_point_or_comma = false
-               for i in self.chars
-               do
-                       if not i.is_numeric
-                       then
-                               if (i == '.' or i == ',') and not has_point_or_comma
-                               then
+               for i in [0..length[ do
+                       var c = chars[i]
+                       if not c.is_numeric then
+                               if (c == '.' or c == ',') and not has_point_or_comma then
                                        has_point_or_comma = true
                                else
                                        return false
@@ -293,6 +285,22 @@ abstract class Text
                return true
        end
 
+       # Returns `true` if the string contains only Hex chars
+       #
+       #     assert "048bf".is_hex  == true
+       #     assert "ABCDEF".is_hex  == true
+       #     assert "0G".is_hex == false
+       fun is_hex: Bool
+       do
+               for i in [0..length[ do
+                       var c = chars[i]
+                       if not (c >= 'a' and c <= 'f') and
+                          not (c >= 'A' and c <= 'F') and
+                          not (c >= '0' and c <= '9') then return false
+               end
+               return true
+       end
+
        # Are all letters in `self` upper-case ?
        #
        #     assert "HELLO WORLD".is_upper == true
@@ -301,7 +309,8 @@ abstract class Text
        #     assert "Hello World".is_upper == false
        fun is_upper: Bool
        do
-               for char in self.chars do 
+               for i in [0..length[ do
+                       var char = chars[i]
                        if char.is_lower then return false
                end
                return true
@@ -314,7 +323,8 @@ abstract class Text
        #     assert "Hello World".is_lower == false
        fun is_lower: Bool
        do
-               for char in self.chars do 
+               for i in [0..length[ do
+                       var char = chars[i]
                        if char.is_upper then return false
                end
                return true
@@ -348,7 +358,7 @@ abstract class Text
                        if iter.item.ascii > 32 then break
                        iter.next
                end
-               if iter.index == length then return self.empty
+               if iter.index < 0 then return self.empty
                return self.substring(0, iter.index + 1)
        end
 
@@ -364,7 +374,8 @@ abstract class Text
        do
                var res = new FlatBuffer
                var underscore = false
-               for c in self.chars do
+               for i in [0..length[ do
+                       var c = chars[i]
                        if (c >= 'a' and c <= 'z') or (c >='A' and c <= 'Z') then
                                res.add(c)
                                underscore = false
@@ -397,7 +408,8 @@ abstract class Text
        fun escape_to_c: String
        do
                var b = new FlatBuffer
-               for c in self.chars do
+               for i in [0..length[ do
+                       var c = chars[i]
                        if c == '\n' then
                                b.append("\\n")
                        else if c == '\0' then
@@ -449,7 +461,8 @@ abstract class Text
        do
                var res = new FlatBuffer.with_capacity(self.length)
                var was_slash = false
-               for c in chars do
+               for i in [0..length[ do
+                       var c = chars[i]
                        if not was_slash then
                                if c == '\\' then
                                        was_slash = true
@@ -474,6 +487,95 @@ abstract class Text
                return res.to_s
        end
 
+       # Encode `self` to percent (or URL) encoding
+       #
+       #     assert "aBc09-._~".to_percent_encoding == "aBc09-._~"
+       #     assert "%()< >".to_percent_encoding == "%25%28%29%3c%20%3e"
+       #     assert ".com/post?e=asdf&f=123".to_percent_encoding == ".com%2fpost%3fe%3dasdf%26f%3d123"
+       fun to_percent_encoding: String
+       do
+               var buf = new FlatBuffer
+
+               for i in [0..length[ do
+                       var c = chars[i]
+                       if (c >= '0' and c <= '9') or
+                          (c >= 'a' and c <= 'z') or
+                          (c >= 'A' and c <= 'Z') or
+                          c == '-' or c == '.' or
+                          c == '_' or c == '~'
+                       then
+                               buf.add c
+                       else buf.append "%{c.ascii.to_hex}"
+               end
+
+               return buf.to_s
+       end
+
+       # Decode `self` from percent (or URL) encoding to a clear string
+       #
+       # Replace invalid use of '%' with '?'.
+       #
+       #     assert "aBc09-._~".from_percent_encoding == "aBc09-._~"
+       #     assert "%25%28%29%3c%20%3e".from_percent_encoding == "%()< >"
+       #     assert ".com%2fpost%3fe%3dasdf%26f%3d123".from_percent_encoding == ".com/post?e=asdf&f=123"
+       #     assert "%25%28%29%3C%20%3E".from_percent_encoding == "%()< >"
+       #     assert "incomplete %".from_percent_encoding == "incomplete ?"
+       #     assert "invalid % usage".from_percent_encoding == "invalid ? usage"
+       fun from_percent_encoding: String
+       do
+               var buf = new FlatBuffer
+
+               var i = 0
+               while i < length do
+                       var c = chars[i]
+                       if c == '%' then
+                               if i + 2 >= length then
+                                       # What follows % has been cut off
+                                       buf.add '?'
+                               else
+                                       i += 1
+                                       var hex_s = substring(i, 2)
+                                       if hex_s.is_hex then
+                                               var hex_i = hex_s.to_hex
+                                               buf.add hex_i.ascii
+                                               i += 1
+                                       else
+                                               # What follows a % is not Hex
+                                               buf.add '?'
+                                               i -= 1
+                                       end
+                               end
+                       else buf.add c
+
+                       i += 1
+               end
+
+               return buf.to_s
+       end
+
+       # Escape the four characters `<`, `>`, `&`, and `"` with their html counterpart
+       #
+       #     assert "a&b->\"x\"".html_escape      ==  "a&amp;b-&gt;&quot;x&quot;"
+       fun html_escape: SELFTYPE
+       do
+               var buf = new FlatBuffer
+
+               for i in [0..length[ do
+                       var c = chars[i]
+                       if c == '&' then
+                               buf.append "&amp;"
+                       else if c == '<' then
+                               buf.append "&lt;"
+                       else if c == '>' then
+                               buf.append "&gt;"
+                       else if c == '"' then
+                               buf.append "&quot;"
+                       else buf.add c
+               end
+
+               return buf.to_s
+       end
+
        # Equality of text
        # Two pieces of text are equals if thez have the same characters in the same order.
        #
@@ -533,7 +635,8 @@ abstract class Text
                        # djb2 hash algorithm
                        var h = 5381
 
-                       for char in self.chars do
+                       for i in [0..length[ do
+                               var char = chars[i]
                                h = h.lshift(5) + h + char.ascii
                        end
 
@@ -659,7 +762,8 @@ abstract class String
                var new_str = new FlatBuffer.with_capacity(self.length)
                var is_first_char = true
 
-               for char in self.chars do
+               for i in [0..length[ do
+                       var char = chars[i]
                        if is_first_char then 
                                new_str.add(char.to_lower)
                                is_first_char = false
@@ -697,7 +801,8 @@ abstract class String
                var is_first_char = true
                var follows_us = false
 
-               for char in self.chars do
+               for i in [0..length[ do
+                       var char = chars[i]
                        if is_first_char then
                                new_str.add(char)
                                is_first_char = false
@@ -745,6 +850,15 @@ class FlatString
 
        redef var chars: SequenceRead[Char] = new FlatStringCharView(self)
 
+       redef fun [](index)
+       do
+               # Check that the index (+ index_from) is not larger than indexTo
+               # In other terms, if the index is valid
+               assert index >= 0
+               assert (index + index_from) <= index_to
+               return items[index + index_from]
+       end
+
        ################################################
        #       AbstractString specific methods        #
        ################################################
@@ -941,8 +1055,9 @@ class FlatString
                        s.items.copy_to(target_string, its_length, 0, my_length)
                else
                        var curr_pos = my_length
-                       for i in s.chars do
-                               target_string[curr_pos] = i
+                       for i in [0..s.length[ do
+                               var c = s.chars[i]
+                               target_string[curr_pos] = c
                                curr_pos += 1
                        end
                end
@@ -1015,7 +1130,7 @@ private class FlatStringReverseIterator
                curr_pos = pos + tgt.index_from
        end
 
-       redef fun is_ok do return curr_pos >= 0
+       redef fun is_ok do return curr_pos >= target.index_from
 
        redef fun item do return target_items[curr_pos]
 
@@ -1166,6 +1281,13 @@ class FlatBuffer
 
        redef fun substrings do return new FlatSubstringsIter(self)
 
+       redef fun [](index)
+       do
+               assert index >= 0
+               assert index  < length
+               return items[index]
+       end
+
        redef fun []=(index, item)
        do
                is_dirty = true
@@ -1234,8 +1356,9 @@ class FlatBuffer
                        s.items.copy_to(items, length, 0, 0)
                else
                        var curr_pos = 0
-                       for i in s.chars do
-                               items[curr_pos] = i
+                       for i in [0..s.length[ do
+                               var c = s.chars[i]
+                               items[curr_pos] = c
                                curr_pos += 1
                        end
                end
@@ -1263,8 +1386,9 @@ class FlatBuffer
                        s.items.copy_to(items, sl, 0, length)
                else
                        var curr_pos = self.length
-                       for i in s.chars do
-                               items[curr_pos] = i
+                       for i in [0..s.length[ do
+                               var c = s.chars[i]
+                               items[curr_pos] = c
                                curr_pos += 1
                        end
                end
@@ -1469,11 +1593,6 @@ redef class Object
        do
                return "{class_name}:#{object_id.to_hex}"
        end
-
-       protected fun args: Sequence[String]
-       do
-               return sys.args
-       end
 end
 
 redef class Bool
@@ -1524,15 +1643,14 @@ redef class Int
        end
 
        # C function to convert an nit Int to a NativeString (char*)
-       private fun native_int_to_s(len: Int): NativeString is extern "native_int_to_s"
+       private fun native_int_to_s: NativeString is extern "native_int_to_s"
 
        # return displayable int in base 10 and signed
        #
        #     assert 1.to_s            == "1"
        #     assert (-123).to_s       == "-123"
        redef fun to_s do
-               var len = digit_count(10)
-               return native_int_to_s(len).to_s_with_length(len)
+               return native_int_to_s.to_s
        end
 
        # return displayable int in hexadecimal
@@ -1713,18 +1831,51 @@ redef class Collection[E]
 end
 
 redef class Array[E]
+
        # Fast implementation
        redef fun to_s
        do
-               var s = new FlatBuffer
-               var i = 0
                var l = length
+               if l == 0 then return ""
+               if l == 1 then if self[0] == null then return "" else return self[0].to_s
+               var its = _items
+               var na = new NativeArray[String](l)
+               var i = 0
+               var sl = 0
+               var mypos = 0
                while i < l do
-                       var e = self[i]
-                       if e != null then s.append(e.to_s)
+                       var itsi = its[i]
+                       if itsi == null then
+                               i += 1
+                               continue
+                       end
+                       var tmp = itsi.to_s
+                       sl += tmp.length
+                       na[mypos] = tmp
                        i += 1
+                       mypos += 1
                end
-               return s.to_s
+               var ns = new NativeString(sl + 1)
+               ns[sl] = '\0'
+               i = 0
+               var off = 0
+               while i < mypos do
+                       var tmp = na[i]
+                       var tpl = tmp.length
+                       if tmp isa FlatString then
+                               tmp.items.copy_to(ns, tpl, tmp.index_from, off)
+                               off += tpl
+                       else
+                               for j in tmp.substrings do
+                                       var s = j.as(FlatString)
+                                       var slen = s.length
+                                       s.items.copy_to(ns, slen, s.index_from, off)
+                                       off += slen
+                               end
+                       end
+                       i += 1
+               end
+               return ns.to_s_with_length(sl)
        end
 end
 
@@ -1769,7 +1920,8 @@ end
 # Native strings are simple C char *
 extern class NativeString `{ char* `}
        super StringCapable
-
+       # Creates a new NativeString with a capacity of `length`
+       new(length: Int) is intern
        fun [](index: Int): Char is intern
        fun []=(index: Int, item: Char) is intern
        fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
@@ -1810,9 +1962,10 @@ interface StringCapable
 end
 
 redef class Sys
-       var _args_cache: nullable Sequence[String]
+       private var args_cache: nullable Sequence[String]
 
-       redef fun args: Sequence[String]
+       # The arguments of the program as given by the OS
+       fun program_args: Sequence[String]
        do
                if _args_cache == null then init_args
                return _args_cache.as(not null)
@@ -1824,7 +1977,7 @@ redef class Sys
                return native_argv(0).to_s
        end
 
-       # Initialize `args` with the contents of `native_argc` and `native_argv`.
+       # Initialize `program_args` with the contents of `native_argc` and `native_argv`.
        private fun init_args
        do
                var argc = native_argc
@@ -1888,3 +2041,9 @@ end
 #     alpha_comparator.sort(a)
 #     assert a == [1, 10, 2, 20, 3]
 fun alpha_comparator: Comparator[Object] do return once new AlphaComparator
+
+# The arguments of the program as given by the OS
+fun args: Sequence[String]
+do
+       return sys.program_args
+end