X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/string.nit b/lib/standard/string.nit index e47399b..fcc1f1b 100644 --- a/lib/standard/string.nit +++ b/lib/standard/string.nit @@ -19,6 +19,7 @@ intrude import collection # FIXME should be collection::array `{ #include +#include `} ############################################################################### @@ -32,16 +33,18 @@ abstract class Text redef type OTHER: Text - # Type of the view on self (.chars) - type SELFVIEW: StringCharView - # Type of self (used for factorization of several methods, ex : substring_from, empty...) type SELFTYPE: Text # Gets a view on the chars of the Text object - fun chars: SELFVIEW is abstract + # + # assert "hello".chars.to_a == ['h', 'e', 'l', 'l', 'o'] + fun chars: SequenceRead[Char] is abstract # Number of characters contained in self. + # + # assert "12345".length == 5 + # assert "".length == 0 fun length: Int is abstract # Create a substring. @@ -56,19 +59,33 @@ abstract class Text # In this case, `from += count` and `count -= from`. fun substring(from: Int, count: Int): SELFTYPE is abstract + # Iterates on the substrings of self if any + fun substrings: Iterator[Text] is abstract + # Concatenates `o` to `self` + # + # assert "hello" + "world" == "helloworld" + # assert "" + "hello" + "" == "hello" fun +(o: Text): SELFTYPE is abstract # Auto-concatenates self `i` times + # + # assert "abc" * 4 == "abcabcabcabc" + # assert "abc" * 1 == "abc" + # assert "abc" * 0 == "" fun *(i: Int): SELFTYPE is abstract # Is the current Text empty (== "") - # assert "".is_empty - # assert not "foo".is_empty + # + # assert "".is_empty + # assert not "foo".is_empty fun is_empty: Bool do return self.length == 0 # Returns an empty Text of the right type - fun empty: SELFTYPE is abstract + # + # This method is used internally to get the right + # implementation of an empty string. + protected fun empty: SELFTYPE is abstract # Gets the first char of the Text # @@ -107,6 +124,7 @@ abstract class Text var iter = self.chars.iterator_from(pos) while iter.is_ok do if iter.item == c then return iter.index + iter.next end return -1 end @@ -121,10 +139,14 @@ abstract class Text return last_index_of_from(c, length - 1) end + # Return a null terminated char * + fun to_cstring: NativeString do return flatten.to_cstring + # The index of the last occurrence of an element starting from pos (in reverse order). - # Example : - # assert "/etc/bin/test/test.nit".last_index_of_from('/', length-1) == 13 - # assert "/etc/bin/test/test.nit".last_index_of_from('/', 12) == 8 + # + # var s = "/etc/bin/test/test.nit" + # assert s.last_index_of_from('/', s.length-1) == 13 + # assert s.last_index_of_from('/', 12) == 8 # # Returns -1 if not found # @@ -174,6 +196,13 @@ abstract class Text return substring(from, length - from) end + # Returns a reversed version of self + # + # assert "hello".reversed == "olleh" + # assert "bob".reversed == "bob" + # assert "".reversed == "" + fun reversed: SELFTYPE is abstract + # Does self have a substring `str` starting from position `pos`? # # assert "abcd".has_substring("bc",1) == true @@ -181,7 +210,7 @@ abstract class Text fun has_substring(str: String, pos: Int): Bool do var myiter = self.chars.iterator_from(pos) - var itsiter = str.iterator + var itsiter = str.chars.iterator while myiter.is_ok and itsiter.is_ok do if myiter.item != itsiter.item then return false myiter.next @@ -226,6 +255,8 @@ abstract class Text end # If `self` contains only digits and alpha <= 'f', return the corresponding integer. + # + # assert "ff".to_hex == 255 fun to_hex: Int do return a_to(16) # If `self` contains only digits and letters, return the corresponding integer in a given base @@ -293,6 +324,10 @@ abstract class Text fun to_lower : SELFTYPE is abstract # Removes the whitespaces at the beginning of self + # + # assert " \n\thello \n\t".l_trim == "hello \n\t" + # + # A whitespace is defined as any character which ascii value is less than or equal to 32 fun l_trim: SELFTYPE do var iter = self.chars.iterator @@ -305,6 +340,10 @@ abstract class Text end # Removes the whitespaces at the end of self + # + # assert " \n\thello \n\t".r_trim == " \n\thello" + # + # A whitespace is defined as any character which ascii value is less than or equal to 32 fun r_trim: SELFTYPE do var iter = self.chars.reverse_iterator @@ -388,7 +427,7 @@ abstract class Text fun escape_more_to_c(chars: String): String do var b = new FlatBuffer - for c in escape_to_c do + for c in escape_to_c.chars do if chars.chars.has(c) then b.add('\\') end @@ -397,24 +436,23 @@ abstract class Text return b.to_s end - # Escape to c plus braces + # Escape to C plus braces # # assert "\n\"'\\\{\}".escape_to_nit == "\\n\\\"\\'\\\\\\\{\\\}" fun escape_to_nit: String do return escape_more_to_c("\{\}") # Return a string where Nit escape sequences are transformed. # - # Example: # var s = "\\n" # assert s.length == 2 # var u = s.unescape_nit # assert u.length == 1 - # assert u[0].ascii == 10 # (the ASCII value of the "new line" character) + # assert u.chars[0].ascii == 10 # (the ASCII value of the "new line" character) fun unescape_nit: String do var res = new FlatBuffer.with_capacity(self.length) var was_slash = false - for c in self do + for c in chars do if not was_slash then if c == '\\' then was_slash = true @@ -439,6 +477,22 @@ abstract class Text return res.to_s end + # Equality of text + # Two pieces of text are equals if thez have the same characters in the same order. + # + # assert "hello" == "hello" + # assert "hello" != "HELLO" + # assert "hello" == "hel"+"lo" + # + # Things that are not Text are not equal. + # + # assert "9" != '9' + # assert "9" != ['9'] + # assert "9" != 9 + # + # assert "9".chars.first == '9' # equality of Char + # assert "9".chars == ['9'] # equality of Sequence + # assert "9".to_i == 9 # equality of Int redef fun ==(o) do if o == null then return false @@ -448,14 +502,49 @@ abstract class Text return self.chars == o.chars end - redef fun <(o) + # Lexicographical comparaison + # + # assert "abc" < "xy" + # assert "ABC" < "abc" + redef fun <(other) do - return self.chars < o.chars + var self_chars = self.chars.iterator + var other_chars = other.chars.iterator + + while self_chars.is_ok and other_chars.is_ok do + if self_chars.item < other_chars.item then return true + if self_chars.item > other_chars.item then return false + self_chars.next + other_chars.next + end + + if self_chars.is_ok then + return false + else + return true + end end # Flat representation of self fun flatten: FlatText is abstract + private var hash_cache: nullable Int = null + + redef fun hash + do + if hash_cache == null then + # djb2 hash algorithm + var h = 5381 + + for char in self.chars do + h = h.lshift(5) + h + char.ascii + end + + hash_cache = h + end + return hash_cache.as(not null) + end + end # All kinds of array-based text representations. @@ -464,7 +553,10 @@ abstract class FlatText private var items: NativeString - redef var length: Int + # Real items, used as cache for to_cstring is called + private var real_items: nullable NativeString = null + + redef var length: Int = 0 init do end @@ -482,14 +574,11 @@ end # Abstract class for the SequenceRead compatible # views on String and Buffer objects -abstract class StringCharView +private abstract class StringCharView super SequenceRead[Char] - super Comparable type SELFTYPE: Text - redef type OTHER: StringCharView - private var target: SELFTYPE private init(tgt: SELFTYPE) @@ -503,85 +592,12 @@ abstract class StringCharView redef fun iterator: IndexedIterator[Char] do return self.iterator_from(0) - # Gets a new Iterator starting at position `pos` - # - # Ex : - # var iter = "abcd".iterator_from(2) - # while iter.is_ok do - # printn iter.item - # iter.next - # end - # - # Outputs : cd - fun iterator_from(pos: Int): IndexedIterator[Char] is abstract - - # Gets an iterator starting at the end and going backwards - # - # Ex : - # var reviter = "now step live...".reverse_iterator - # while reviter.is_ok do - # printn reviter.item - # reviter.next - # end - # - # Outputs : ...evil pets won - fun reverse_iterator: IndexedIterator[Char] do return self.reverse_iterator_from(self.length - 1) - - # Gets an iterator on the chars of self starting from `pos` - # - # Ex : - # var iter = "abcd".reverse_iterator_from(1) - # while iter.is_ok do - # printn iter.item - # iter.next - # end - # - # Outputs : ba - fun reverse_iterator_from(pos: Int): IndexedIterator[Char] is abstract - - redef fun has(c: Char): Bool - do - for i in self do - if i == c then return true - end - return false - end - - redef fun ==(other) - do - if other == null then return false - if not other isa StringCharView then return false - var other_chars = other.iterator - for i in self do - if i != other_chars.item then return false - other_chars.next - end - return true - end - - redef fun <(other) - do - var self_chars = self.iterator - var other_chars = other.iterator - - while self_chars.is_ok and other_chars.is_ok do - if self_chars.item < other_chars.item then return true - if self_chars.item > other_chars.item then return false - self_chars.next - other_chars.next - end - - if self_chars.is_ok then - return false - else - return true - end - end + redef fun reverse_iterator do return self.reverse_iterator_from(self.length - 1) end # View on Buffer objects, extends Sequence # for mutation operations -abstract class BufferCharView +private abstract class BufferCharView super StringCharView super Sequence[Char] @@ -596,6 +612,28 @@ abstract class String redef fun to_s do return self + fun append(s: String): SELFTYPE is abstract + + fun prepend(s: String): SELFTYPE is abstract + + fun insert_at(s: String, pos: Int): SELFTYPE is abstract +end + +private class FlatSubstringsIter + super Iterator[FlatText] + + var tgt: nullable FlatText + + init(tgt: FlatText) do self.tgt = tgt + + redef fun item do + assert is_ok + return tgt.as(not null) + end + + redef fun is_ok do return tgt != null + + redef fun next do tgt = null end # Immutable strings of characters. @@ -603,27 +641,31 @@ class FlatString super FlatText super String - redef type SELFTYPE: FlatString - redef type SELFVIEW: FlatStringCharView - # Index in _items of the start of the string private var index_from: Int # Indes in _items of the last item of the string private var index_to: Int - redef var chars: SELFVIEW = new FlatStringCharView(self) + redef var chars: SequenceRead[Char] = new FlatStringCharView(self) ################################################ # AbstractString specific methods # ################################################ - redef fun [](index) do - assert index >= 0 - # Check that the index (+ index_from) is not larger than indexTo - # In other terms, if the index is valid - assert (index + index_from) <= index_to - return items[index + index_from] + redef fun reversed + do + var native = calloc_string(self.length + 1) + var length = self.length + var items = self.items + var pos = 0 + var ipos = length-1 + while pos < length do + native[pos] = items[ipos] + pos += 1 + ipos -= 1 + end + return native.to_s_with_length(self.length) end redef fun substring(from, count) @@ -711,13 +753,14 @@ class FlatString index_to = to end - # Return a null terminated char * - fun to_cstring: NativeString + redef fun to_cstring: NativeString do + if real_items != null then return real_items.as(not null) if index_from > 0 or index_to != items.cstring_length - 1 then var newItems = calloc_string(length + 1) self.items.copy_to(newItems, length, index_from, 0) newItems[length] = '\0' + self.real_items = newItems return newItems end return items @@ -750,9 +793,6 @@ class FlatString return true end - # The comparison between two strings is done on a lexicographical basis - # - # assert ("aa" < "b") == true redef fun <(other) do if not other isa FlatString then return super @@ -789,9 +829,6 @@ class FlatString return my_length < its_length end - # The concatenation of `self` with `s` - # - # assert "hello " + "world!" == "hello world!" redef fun +(s) do var my_length = self.length @@ -819,9 +856,6 @@ class FlatString return target_string.to_s_with_length(total_length) end - # assert "abc"*3 == "abcabcabc" - # assert "abc"*1 == "abc" - # assert "abc"*0 == "" redef fun *(i) do assert i >= 0 @@ -848,22 +882,25 @@ class FlatString redef fun hash do - # djb2 hash algorythm - var h = 5381 - var i = length - 1 + if hash_cache == null then + # djb2 hash algorithm + var h = 5381 + var i = index_from - var myitems = items - var strStart = index_from + var myitems = items - i += strStart + while i <= index_to do + h = h.lshift(5) + h + myitems[i].ascii + i += 1 + end - while i >= strStart do - h = (h * 32) + h + self.items[i].ascii - i -= 1 + hash_cache = h end - return h + return hash_cache.as(not null) end + + redef fun substrings do return new FlatSubstringsIter(self) end private class FlatStringReverseIterator @@ -928,6 +965,7 @@ private class FlatStringCharView # Check that the index (+ index_from) is not larger than indexTo # In other terms, if the index is valid assert index >= 0 + var target = self.target assert (index + target.index_from) <= target.index_to return target.items[index + target.index_from] end @@ -941,9 +979,11 @@ end abstract class Buffer super Text - redef type SELFVIEW: BufferCharView redef type SELFTYPE: Buffer + # Specific implementations MUST set this to `true` in order to invalidate caches + protected var is_dirty = true + # Modifies the char contained at pos `index` # # DEPRECATED : Use self.chars.[]= instead @@ -955,14 +995,34 @@ abstract class Buffer fun add(c: Char) is abstract # Clears the buffer + # + # var b = new FlatBuffer + # b.append "hello" + # assert not b.is_empty + # b.clear + # assert b.is_empty fun clear is abstract # Enlarges the subsequent array containing the chars of self fun enlarge(cap: Int) is abstract # Adds the content of text `s` at the end of self + # + # var b = new FlatBuffer + # b.append "hello" + # b.append "world" + # assert b == "helloworld" fun append(s: Text) is abstract + redef fun hash + do + if is_dirty then hash_cache = null + return super + end + + # In Buffers, the internal sequence of character is mutable + # Thus, `chars` can be used to modify the buffer. + redef fun chars: Sequence[Char] is abstract end # Mutable strings of characters. @@ -970,15 +1030,17 @@ class FlatBuffer super FlatText super Buffer - redef type SELFVIEW: FlatBufferCharView redef type SELFTYPE: FlatBuffer - redef var chars: SELFVIEW = new FlatBufferCharView(self) + redef var chars: Sequence[Char] = new FlatBufferCharView(self) - var capacity: Int + private var capacity: Int = 0 + + redef fun substrings do return new FlatSubstringsIter(self) redef fun []=(index, item) do + is_dirty = true if index == length then add(item) return @@ -989,12 +1051,16 @@ class FlatBuffer redef fun add(c) do + is_dirty = true if capacity <= length then enlarge(length + 5) items[length] = c length += 1 end - redef fun clear do length = 0 + redef fun clear do + is_dirty = true + length = 0 + end redef fun empty do return new FlatBuffer @@ -1004,30 +1070,31 @@ class FlatBuffer if cap <= c then return while c <= cap do c = c * 2 + 2 var a = calloc_string(c+1) - items.copy_to(a, length, 0, 0) + if length > 0 then items.copy_to(a, length, 0, 0) items = a capacity = c - items.copy_to(a, length, 0, 0) end redef fun to_s: String do - var l = length - var a = calloc_string(l+1) - items.copy_to(a, l, 0, 0) - - # Ensure the afterlast byte is '\0' to nul-terminated char * - a[length] = '\0' - - return a.to_s_with_length(length) + return to_cstring.to_s_with_length(length) end - # Create a new empty string. - init + redef fun to_cstring do - with_capacity(5) + if is_dirty then + var new_native = calloc_string(length + 1) + new_native[length] = '\0' + if length > 0 then items.copy_to(new_native, length, 0, 0) + real_items = new_native + is_dirty = false + end + return real_items.as(not null) end + # Create a new empty string. + init do end + init from(s: Text) do capacity = s.length + 1 @@ -1058,6 +1125,8 @@ class FlatBuffer redef fun append(s) do + if s.is_empty then return + is_dirty = true var sl = s.length if capacity < length + sl then enlarge(length + sl) if s isa FlatString then @@ -1102,6 +1171,17 @@ class FlatBuffer end end + redef fun reversed + do + var new_buf = new FlatBuffer.with_capacity(self.length) + var reviter = self.chars.reverse_iterator + while reviter.is_ok do + new_buf.add(reviter.item) + reviter.next + end + return new_buf + end + redef fun +(other) do var new_buf = new FlatBuffer.with_capacity(self.length + other.length) @@ -1118,6 +1198,24 @@ class FlatBuffer end return new_buf end + + redef fun to_upper + do + var new_buf = new FlatBuffer.with_capacity(self.length) + for i in self.chars do + new_buf.add(i.to_upper) + end + return new_buf + end + + redef fun to_lower + do + var new_buf = new FlatBuffer.with_capacity(self.length) + for i in self.chars do + new_buf.add(i.to_lower) + end + return new_buf + end end private class FlatBufferReverseIterator @@ -1132,7 +1230,7 @@ private class FlatBufferReverseIterator init with_pos(tgt: FlatBuffer, pos: Int) do target = tgt - target_items = tgt.items + if tgt.length > 0 then target_items = tgt.items curr_pos = pos end @@ -1204,7 +1302,7 @@ private class FlatBufferIterator init with_pos(tgt: FlatBuffer, pos: Int) do target = tgt - target_items = tgt.items + if tgt.length > 0 then target_items = tgt.items curr_pos = pos end @@ -1268,9 +1366,18 @@ redef class Bool end redef class Int + + # Wrapper of strerror C function + private fun strerror_ext: NativeString is extern `{ + return strerror(recv); + `} + + # Returns a string describing error number + fun strerror: String do return strerror_ext.to_s + # Fill `s` with the digits in base `base` of `self` (and with the '-' sign if 'signed' and negative). # assume < to_c max const of char - fun fill_buffer(s: Buffer, base: Int, signed: Bool) + private fun fill_buffer(s: Buffer, base: Int, signed: Bool) do var n: Int # Sign @@ -1304,7 +1411,10 @@ redef class Int return native_int_to_s(len).to_s_with_length(len) end - # return displayable int in hexadecimal (unsigned (not now)) + # return displayable int in hexadecimal + # + # assert 1.to_hex == "1" + # assert (-255).to_hex == "-ff" fun to_hex: String do return to_base(16,false) # return displayable int in base base and signed @@ -1319,6 +1429,11 @@ end redef class Float # Pretty print self, print needoed decimals up to a max of 3. + # + # assert 12.34.to_s == "12.34" + # assert (-0120.03450).to_s == "-120.035" + # + # see `to_precision` for a different precision. redef fun to_s do var str = to_precision( 3 ) if is_inf != 0 or is_nan then return str @@ -1338,6 +1453,11 @@ redef class Float end # `self` representation with `nb` digits after the '.'. + # + # assert 12.345.to_precision(1) == "12.3" + # assert 12.345.to_precision(2) == "12.35" + # assert 12.345.to_precision(3) == "12.345" + # assert 12.345.to_precision(4) == "12.3450" fun to_precision(nb: Int): String do if is_nan then return "nan" @@ -1370,6 +1490,12 @@ redef class Float end end + # `self` representation with `nb` digits after the '.'. + # + # assert 12.345.to_precision_native(1) == "12.3" + # assert 12.345.to_precision_native(2) == "12.35" + # assert 12.345.to_precision_native(3) == "12.345" + # assert 12.345.to_precision_native(4) == "12.3450" fun to_precision_native(nb: Int): String import NativeString.to_s `{ int size; char *str; @@ -1392,27 +1518,37 @@ redef class Char end # Returns true if the char is a numerical digit + # + # assert '0'.is_numeric + # assert '9'.is_numeric + # assert not 'a'.is_numeric + # assert not '?'.is_numeric fun is_numeric: Bool do - if self >= '0' and self <= '9' - then - return true - end - return false + return self >= '0' and self <= '9' end # Returns true if the char is an alpha digit + # + # assert 'a'.is_alpha + # assert 'Z'.is_alpha + # assert not '0'.is_alpha + # assert not '?'.is_alpha fun is_alpha: Bool do - if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true - return false + return (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') end # Returns true if the char is an alpha or a numeric digit + # + # assert 'a'.is_alphanumeric + # assert 'Z'.is_alphanumeric + # assert '0'.is_alphanumeric + # assert '9'.is_alphanumeric + # assert not '?'.is_alphanumeric fun is_alphanumeric: Bool do - if self.is_numeric or self.is_alpha then return true - return false + return self.is_numeric or self.is_alpha end end @@ -1429,7 +1565,7 @@ redef class Collection[E] # # assert [1, 2, 3].join(":") == "1:2:3" # assert [1..3].join(":") == "1:2:3" - fun join(sep: String): String + fun join(sep: Text): String do if is_empty then return "" @@ -1585,3 +1721,47 @@ redef class Sys private fun native_argv(i: Int): NativeString is intern end +# Comparator that efficienlty use `to_s` to compare things +# +# The comparaison call `to_s` on object and use the result to order things. +# +# var a = [1, 2, 3, 10, 20] +# (new CachedAlphaComparator).sort(a) +# assert a == [1, 10, 2, 20, 3] +# +# Internally the result of `to_s` is cached in a HashMap to counter +# uneficient implementation of `to_s`. +# +# Note: it caching is not usefull, see `alpha_comparator` +class CachedAlphaComparator + super Comparator[Object] + + private var cache = new HashMap[Object, String] + + private fun do_to_s(a: Object): String do + if cache.has_key(a) then return cache[a] + var res = a.to_s + cache[a] = res + return res + end + + redef fun compare(a, b) do + return do_to_s(a) <=> do_to_s(b) + end +end + +# see `alpha_comparator` +private class AlphaComparator + super Comparator[Object] + redef fun compare(a, b) do return a.to_s <=> b.to_s +end + +# Stateless comparator that naively use `to_s` to compare things. +# +# Note: the result of `to_s` is not cached, thus can be invoked a lot +# on a single instace. See `CachedAlphaComparator` as an alternative. +# +# var a = [1, 2, 3, 10, 20] +# alpha_comparator.sort(a) +# assert a == [1, 10, 2, 20, 3] +fun alpha_comparator: Comparator[Object] do return once new AlphaComparator