X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/string.nit b/lib/standard/string.nit index 6089102..887ac86 100644 --- a/lib/standard/string.nit +++ b/lib/standard/string.nit @@ -11,12 +11,16 @@ # You are allowed to redistribute it and sell it, alone or is a part of # another product. -# This module is about character strings. +# Basic manipulations of strings of characters package string intrude import collection # FIXME should be collection::array import hash +`{ +#include +`} + ############################################################################### # String # ############################################################################### @@ -53,7 +57,7 @@ abstract class AbstractString end end - # Create a substring with the string beginning at the 'from' position + # Create a substring from `self' beginning at the 'from' position # # "abcd".substring(1) # --> "bcd" # "abcd".substring(-1) # --> "abcd" @@ -64,10 +68,10 @@ abstract class AbstractString return substring(from, length - from) end - # is this string a substring of the 'of' string from pos 'pos' + # Does self have a substring 'str' starting from position 'pos # - # "bc".is_substring("abcd",1) # --> true - # "bc".is_substring("abcd",2) # --> false + # "abcd".has_substring("bc",1) # --> true + # "abcd".has_substring("bc",2) # --> false fun has_substring(str: String, pos: Int): Bool do var itsindex = str.length - 1 @@ -75,7 +79,9 @@ abstract class AbstractString var myitems = _items var itsitems = str._items if myindex > length or itsindex > myindex then return false - while itsindex >= 0 do + var its_index_from = str._index_from + itsindex += its_index_from + while itsindex >= its_index_from do if myitems[myindex] != itsitems[itsindex] then return false myindex -= 1 itsindex -= 1 @@ -102,6 +108,13 @@ abstract class AbstractString return to_s.to_cstring.atoi end + # If `self' contains a float, return the corresponding float + fun to_f: Float + do + # Shortcut + return to_s.to_cstring.atof + end + # If `self' contains only digits and alpha <= 'f', return the corresponding integer. fun to_hex: Int do return a_to(16) @@ -133,7 +146,26 @@ abstract class AbstractString end end - # String to upper case + # Returns true if the string contains only Numeric values (and one "," or one "." character) + fun is_numeric: Bool + do + var has_point_or_comma = false + for i in self + do + if not i.is_numeric + then + if (i == '.' or i == ',') and not has_point_or_comma + then + has_point_or_comma = true + else + return false + end + end + end + return true + end + + # A upper case version of `self' fun to_upper: String do var s = new Buffer.with_capacity(length) @@ -141,7 +173,7 @@ abstract class AbstractString return s.to_s end - # String to lower case + # A lower case version of `self' fun to_lower : String do var s = new Buffer.with_capacity(length) @@ -149,6 +181,25 @@ abstract class AbstractString return s.to_s end + # Trims trailing and preceding white spaces + # A whitespace is defined as any character which ascii value is less than or equal to 32 + fun trim: String + do + if self._length == 0 then return self.to_s + # find position of the first non white space char (ascii < 32) from the start of the string + var start_pos = 0 + while self[start_pos].ascii <= 32 do + start_pos += 1 + if start_pos == _length then return "" + end + # find position of the first non white space char from the end of the string + var end_pos = length - 1 + while self[end_pos].ascii <= 32 do + end_pos -= 1 + if end_pos == start_pos then return self[start_pos].to_s + end + return self.substring(start_pos, end_pos - start_pos + 1) + end redef fun output do @@ -160,93 +211,341 @@ abstract class AbstractString end end - +# Immutable strings of characters. class String super Comparable super AbstractString + super StringCapable redef type OTHER: String + # Index in _items of the start of the string + readable var _index_from: Int + + # Indes in _items of the last item of the string + readable var _index_to: Int + + ################################################ + # AbstractString specific methods # + ################################################ + + # Access a character at index in String + # + redef fun [](index) do + assert index >= 0 + # Check that the index (+ index_from) is not larger than indexTo + # In other terms, if the index is valid + assert (index + _index_from) <= _index_to + return _items[index + _index_from] + end + + # Create a substring. + # + # "abcd".substring(1, 2) # --> "bc" + # "abcd".substring(-1, 2) # --> "a" + # "abcd".substring(1, 0) # --> "" + # "abcd".substring(2, 5) # --> "cd" + # + # A "from" index < 0 will be replaced by 0 + # Unless a count value is > 0 at the same time + # In this case, from += count and count -= from + # + redef fun substring(from: Int, count: Int): String + do + assert count >= 0 + + if from < 0 then + count += from + if count < 0 then count = 0 + from = 0 + end + + var realFrom = _index_from + from + + if (realFrom + count) > _index_to then return new String.from_substring(realFrom, _index_to, _items) + + if count == 0 then return "" + + return new String.from_substring(realFrom, realFrom + count - 1, _items) + end + + # Create a substring from `self' beginning at the 'from' position + # + # "abcd".substring_from(1) # --> "bcd" + # "abcd".substring_from(-1) # --> "abcd" + # "abcd".substring_from(2) # --> "cd" + # + # As with substring, a "from" index < 0 will be replaced by 0 + # + redef fun substring_from(from: Int): String + do + if from > _length then return "" + if from < 0 then from = 0 + return substring(from, _length) + end + + # Does self have a substring 'str' starting from position 'pos + # + # "abcd".has_substring("bc",1) # --> true + # "abcd".has_substring("bc",2) # --> false + redef fun has_substring(str: String, pos: Int): Bool + do + var itsindex = str._length - 1 + + var myindex = pos + itsindex + var myitems = _items + + var itsitems = str._items + + if myindex > _length or itsindex > myindex then return false + + var itsindexfrom = str.index_from + itsindex += itsindexfrom + myindex += index_from + + while itsindex >= itsindexfrom do + if myitems[myindex] != itsitems[itsindex] then return false + myindex -= 1 + itsindex -= 1 + end + + return true + end + + # A upper case version of `self' + redef fun to_upper: String + do + var outstr = calloc_string(self._length + 1) + var out_index = 0 + + var myitems = self._items + var index_from = self._index_from + var max = self._index_to + + while index_from <= max do + outstr[out_index] = myitems[index_from].to_upper + out_index += 1 + index_from += 1 + end + + outstr[self.length] = '\0' + + return new String.with_native(outstr, self._length) + end + + # A lower case version of `self' + redef fun to_lower : String + do + var outstr = calloc_string(self._length + 1) + var out_index = 0 + + var myitems = self._items + var index_from = self._index_from + var max = self._index_to + + while index_from <= max do + outstr[out_index] = myitems[index_from].to_lower + out_index += 1 + index_from += 1 + end + + outstr[self.length] = '\0' + + return new String.with_native(outstr, self._length) + end + + redef fun trim: String + do + if self._length == 0 then return self + # find position of the first non white space char (ascii < 32) from the start of the string + var start_pos = self._index_from + while _items[start_pos].ascii <= 32 do + start_pos += 1 + if start_pos == _index_to + 1 then return "" + end + # find position of the first non white space char from the end of the string + var end_pos = _index_to + while _items[end_pos].ascii <= 32 do + end_pos -= 1 + if end_pos == start_pos then return _items[start_pos].to_s + end + start_pos -= index_from + end_pos -= index_from + return self.substring(start_pos, end_pos - start_pos + 1) + end + + redef fun output + do + var i = self._index_from + var imax = self._index_to + while i <= imax do + _items[i].output + i += 1 + end + end + + ################################################## + # String Specific Methods # + ################################################## + + # Creates a String object as a substring of another String + # + # From : index to start at + # + # To : Index to stop at (from + count -1) + # + private init from_substring(from: Int, to: Int, internalString: NativeString) + do + _items = internalString + _index_from = from + _index_to = to + _length = to - from + 1 + end + # Create a new string from a given char *. init with_native(nat: NativeString, size: Int) do assert size >= 0 _items = nat _length = size + _index_from = 0 + _index_to = _length - 1 end # Create a new string from a null terminated char *. init from_cstring(str: NativeString) do - var size = str.cstring_length - _items = str - _length = size + with_native(str,str.cstring_length) + end + + # Creates a new Nit String from an existing CString + # Pretty much equals to from_cstring but copies instead + # of passing a reference + # Avoids manual/automatic dealloc problems when dealing with native C code + init copy_from_native(str: NativeString) + do + var temp_length = str.cstring_length + var new_str = calloc_string(temp_length + 1) + str.copy_to(new_str, temp_length, 0, 0) + new_str[temp_length] = '\0' + with_native(new_str, temp_length) end # Return a null terminated char * fun to_cstring: NativeString do + #return items + if _index_from > 0 or _index_to != items.cstring_length - 1 then + var newItems = calloc_string(_length + 1) + self.items.copy_to(newItems, _length, _index_from, 0) + newItems[length] = '\0' + return newItems + end return _items end - redef fun ==(o) + redef fun ==(other) do - if not o isa String or o is null then return false - var l = length - if o.length != l then return false - var i = 0 - var it = _items - var oit = o._items - while i < l do - if it[i] != oit[i] then return false - i += 1 + if not other isa String or other is null then return false + + if self.object_id == other.object_id then return true + + var my_length = _length + + if other._length != my_length then return false + + var my_index = _index_from + var its_index = other._index_from + + var last_iteration = my_index + my_length + + var itsitems = other._items + var myitems = self._items + + while my_index < last_iteration do + if myitems[my_index] != itsitems[its_index] then return false + my_index += 1 + its_index += 1 end + return true end - redef fun <(s) + # The comparison between two strings is done on a lexicographical basis + # Eg : "aa" < "b" => true + redef fun <(other) do - var i = 0 - var l1 = length - var l2 = s.length - var n1 = _items - var n2 = s._items - while i < l1 and i < l2 do - var c1 = n1[i].ascii - var c2 = n2[i].ascii - if c1 < c2 then - return true - else if c2 < c1 then + if self.object_id == other.object_id then return false + + var my_curr_char : Char + var its_curr_char : Char + + var curr_id_self = self._index_from + var curr_id_other = other._index_from + + var my_items = self._items + var its_items = other._items + + var my_length = self._length + var its_length = other._length + + var max_iterations = curr_id_self + my_length + + while curr_id_self < max_iterations do + my_curr_char = my_items[curr_id_self] + its_curr_char = its_items[curr_id_other] + + if my_curr_char != its_curr_char then + if my_curr_char < its_curr_char then return true return false end - i += 1 - end - if l1 < l2 then - return true - else - return false + + curr_id_self += 1 + curr_id_other += 1 end + + return my_length < its_length end # The concatenation of `self' with `r' fun +(s: String): String do - var r = new Buffer.with_capacity(length + s.length) - r.append(self) - r.append(s) - return r.to_s + var my_length = self._length + var its_length = s._length + + var target_string = calloc_string(my_length + its_length + 1) + + self._items.copy_to(target_string, my_length, _index_from, 0) + s._items.copy_to(target_string, its_length, s._index_from, my_length) + + target_string[my_length + its_length] = '\0' + + return new String.with_native(target_string, my_length + its_length) end # i repetitions of self fun *(i: Int): String do assert i >= 0 - var r = new Buffer.with_capacity(length * i) - while i > 0 do - r.append(self) - i -= 1 + + var my_length = self._length + + var final_length = my_length * i + + var my_items = self._items + + var target_string = calloc_string((final_length) + 1) + + target_string[final_length] = '\0' + + var current_last = 0 + + for iteration in [1 .. i] do + my_items.copy_to(target_string, my_length, 0, current_last) + current_last += my_length end - return r.to_s + + return new String.with_native(target_string, final_length) end redef fun to_s do return self @@ -256,17 +555,22 @@ class String # djb2 hash algorythm var h = 5381 var i = _length - 1 - var it = _items - while i >= 0 do - h = (h * 32) + h + it[i].ascii + + var myitems = _items + var strStart = _index_from + + i += strStart + + while i >= strStart do + h = (h * 32) + h + self._items[i].ascii i -= 1 end - return h + return h end end -# Strings are arrays of characters. +# Mutable strings of characters. class Buffer super AbstractString super Comparable @@ -307,8 +611,8 @@ class Buffer do if s isa String then var sl = s.length - if _capacity < length + sl then enlarge(length + sl) - s.items.copy_to(_items, sl, 0, length) + if _capacity < _length + sl then enlarge(_length + sl) + s.items.copy_to(_items, sl, s._index_from, _length) _length += sl else super @@ -360,7 +664,7 @@ class Buffer _capacity = s.length + 1 _length = s.length _items = calloc_string(_capacity) - s.items.copy_to(_items, _length, 0, 0) + s.items.copy_to(_items, _length, s._index_from, 0) end # Create a new empty string with a given capacity. @@ -396,8 +700,6 @@ end ############################################################################### redef class Object - # fun class_name: String is extern intern # The name of the class - # User readable representation of `self'. fun to_s: String do return inspect @@ -405,11 +707,13 @@ redef class Object private fun native_class_name: NativeString is intern # The class name of the object. - # FIXME: real type information is not available at runtime. Therefore, for instance, an instance of List[Bool] has just "List" for classname + # FIXME: real type information is not available at runtime. + # Therefore, for instance, an instance of List[Bool] has just + # "List" for class_name fun class_name: String do return new String.from_cstring(native_class_name) # Developer readable representation of `self'. - # Usualy, it uses the form "" + # Usually, it uses the form "" fun inspect: String do return "<{inspect_head}>" @@ -481,22 +785,58 @@ redef class Int end redef class Float - redef fun to_s do return to_precision(6) + # Pretty print self, print needed decimals up to a max of 6. + redef fun to_s do + var str = to_precision( 3 ) + var len = str.length + for i in [0..len-1] do + var j = len-1-i + var c = str[j] + if c == '0' then + continue + else if c == '.' then + return str.substring( 0, j+2 ) + else + return str.substring( 0, j+1 ) + end + end + return str + end # `self' representation with `nb' digits after the '.'. fun to_precision(nb: Int): String do - if nb == 0 then return to_i.to_s - - var i = to_i - var dec = 1.0 - while nb > 0 do - dec = dec * 10.0 - nb -= 1 + if nb == 0 then return self.to_i.to_s + var f = self + for i in [0..nb[ do f = f * 10.0 + if self > 0.0 then + f = f + 0.5 + else + f = f - 0.5 + end + var i = f.to_i + if i == 0 then return "0.0" + var s = i.to_s + var sl = s.length + if sl > nb then + var p1 = s.substring(0, s.length-nb) + var p2 = s.substring(s.length-nb, nb) + return p1 + "." + p2 + else + return "0." + ("0"*(nb-sl)) + s end - var d = ((self-i.to_f)*dec).to_i - return "{i}.{d}" end + + fun to_precision_native(nb: Int): String import String::from_cstring `{ + int size; + char *str; + + size = snprintf(NULL, 0, "%.*f", (int)nb, recv); + str = malloc(size + 1); + sprintf(str, "%.*f", (int)nb, recv ); + + return new_String_from_cstring( str ); + `} end redef class Char @@ -506,6 +846,30 @@ redef class Char s[0] = self return s.to_s end + + # Returns true if the char is a numerical digit + fun is_numeric: Bool + do + if self >= '0' and self <= '9' + then + return true + end + return false + end + + # Returns true if the char is an alpha digit + fun is_alpha: Bool + do + if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true + return false + end + + # Returns true if the char is an alpha or a numeric digit + fun is_alphanumeric: Bool + do + if self.is_numeric or self.is_alpha then return true + return false + end end redef class Collection[E] @@ -558,7 +922,9 @@ redef class Array[E] end redef class Map[K,V] - # Concatenate couple of 'key value' separate by 'couple_sep' and separate each couple with `sep'. + # Concatenate couple of 'key value'. + # key and value are separated by 'couple_sep'. + # each couple is separated each couple with `sep'. fun join(sep: String, couple_sep: String): String do if is_empty then return "" @@ -602,10 +968,11 @@ class NativeString return l end fun atoi: Int is intern + fun atof: Float is extern "atof" end # StringCapable objects can create native strings -class StringCapable +interface StringCapable protected fun calloc_string(size: Int): NativeString is intern end @@ -637,8 +1004,10 @@ redef class Sys _args_cache = args end - private fun native_argc: Int is extern "kernel_Sys_Sys_native_argc_0" # First argument of the main C function. - - private fun native_argv(i: Int): NativeString is extern "kernel_Sys_Sys_native_argv_1" # Second argument of the main C function. + # First argument of the main C function. + private fun native_argc: Int is intern + + # Second argument of the main C function. + private fun native_argv(i: Int): NativeString is intern end