X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/string.nit b/lib/standard/string.nit index aa3cef5..fcc1f1b 100644 --- a/lib/standard/string.nit +++ b/lib/standard/string.nit @@ -19,6 +19,7 @@ intrude import collection # FIXME should be collection::array `{ #include +#include `} ############################################################################### @@ -32,18 +33,18 @@ abstract class Text redef type OTHER: Text - # Type of the view on self (.chars) - type SELFVIEW: StringCharView - # Type of self (used for factorization of several methods, ex : substring_from, empty...) type SELFTYPE: Text - var hash_cache: nullable Int = null - # Gets a view on the chars of the Text object - fun chars: SELFVIEW is abstract + # + # assert "hello".chars.to_a == ['h', 'e', 'l', 'l', 'o'] + fun chars: SequenceRead[Char] is abstract # Number of characters contained in self. + # + # assert "12345".length == 5 + # assert "".length == 0 fun length: Int is abstract # Create a substring. @@ -58,19 +59,33 @@ abstract class Text # In this case, `from += count` and `count -= from`. fun substring(from: Int, count: Int): SELFTYPE is abstract + # Iterates on the substrings of self if any + fun substrings: Iterator[Text] is abstract + # Concatenates `o` to `self` + # + # assert "hello" + "world" == "helloworld" + # assert "" + "hello" + "" == "hello" fun +(o: Text): SELFTYPE is abstract # Auto-concatenates self `i` times + # + # assert "abc" * 4 == "abcabcabcabc" + # assert "abc" * 1 == "abc" + # assert "abc" * 0 == "" fun *(i: Int): SELFTYPE is abstract # Is the current Text empty (== "") - # assert "".is_empty - # assert not "foo".is_empty + # + # assert "".is_empty + # assert not "foo".is_empty fun is_empty: Bool do return self.length == 0 # Returns an empty Text of the right type - fun empty: SELFTYPE is abstract + # + # This method is used internally to get the right + # implementation of an empty string. + protected fun empty: SELFTYPE is abstract # Gets the first char of the Text # @@ -109,6 +124,7 @@ abstract class Text var iter = self.chars.iterator_from(pos) while iter.is_ok do if iter.item == c then return iter.index + iter.next end return -1 end @@ -127,9 +143,10 @@ abstract class Text fun to_cstring: NativeString do return flatten.to_cstring # The index of the last occurrence of an element starting from pos (in reverse order). - # Example : - # assert "/etc/bin/test/test.nit".last_index_of_from('/', length-1) == 13 - # assert "/etc/bin/test/test.nit".last_index_of_from('/', 12) == 8 + # + # var s = "/etc/bin/test/test.nit" + # assert s.last_index_of_from('/', s.length-1) == 13 + # assert s.last_index_of_from('/', 12) == 8 # # Returns -1 if not found # @@ -180,6 +197,10 @@ abstract class Text end # Returns a reversed version of self + # + # assert "hello".reversed == "olleh" + # assert "bob".reversed == "bob" + # assert "".reversed == "" fun reversed: SELFTYPE is abstract # Does self have a substring `str` starting from position `pos`? @@ -189,7 +210,7 @@ abstract class Text fun has_substring(str: String, pos: Int): Bool do var myiter = self.chars.iterator_from(pos) - var itsiter = str.iterator + var itsiter = str.chars.iterator while myiter.is_ok and itsiter.is_ok do if myiter.item != itsiter.item then return false myiter.next @@ -234,6 +255,8 @@ abstract class Text end # If `self` contains only digits and alpha <= 'f', return the corresponding integer. + # + # assert "ff".to_hex == 255 fun to_hex: Int do return a_to(16) # If `self` contains only digits and letters, return the corresponding integer in a given base @@ -301,6 +324,10 @@ abstract class Text fun to_lower : SELFTYPE is abstract # Removes the whitespaces at the beginning of self + # + # assert " \n\thello \n\t".l_trim == "hello \n\t" + # + # A whitespace is defined as any character which ascii value is less than or equal to 32 fun l_trim: SELFTYPE do var iter = self.chars.iterator @@ -313,6 +340,10 @@ abstract class Text end # Removes the whitespaces at the end of self + # + # assert " \n\thello \n\t".r_trim == " \n\thello" + # + # A whitespace is defined as any character which ascii value is less than or equal to 32 fun r_trim: SELFTYPE do var iter = self.chars.reverse_iterator @@ -396,7 +427,7 @@ abstract class Text fun escape_more_to_c(chars: String): String do var b = new FlatBuffer - for c in escape_to_c do + for c in escape_to_c.chars do if chars.chars.has(c) then b.add('\\') end @@ -405,24 +436,23 @@ abstract class Text return b.to_s end - # Escape to c plus braces + # Escape to C plus braces # # assert "\n\"'\\\{\}".escape_to_nit == "\\n\\\"\\'\\\\\\\{\\\}" fun escape_to_nit: String do return escape_more_to_c("\{\}") # Return a string where Nit escape sequences are transformed. # - # Example: # var s = "\\n" # assert s.length == 2 # var u = s.unescape_nit # assert u.length == 1 - # assert u[0].ascii == 10 # (the ASCII value of the "new line" character) + # assert u.chars[0].ascii == 10 # (the ASCII value of the "new line" character) fun unescape_nit: String do var res = new FlatBuffer.with_capacity(self.length) var was_slash = false - for c in self do + for c in chars do if not was_slash then if c == '\\' then was_slash = true @@ -447,6 +477,22 @@ abstract class Text return res.to_s end + # Equality of text + # Two pieces of text are equals if thez have the same characters in the same order. + # + # assert "hello" == "hello" + # assert "hello" != "HELLO" + # assert "hello" == "hel"+"lo" + # + # Things that are not Text are not equal. + # + # assert "9" != '9' + # assert "9" != ['9'] + # assert "9" != 9 + # + # assert "9".chars.first == '9' # equality of Char + # assert "9".chars == ['9'] # equality of Sequence + # assert "9".to_i == 9 # equality of Int redef fun ==(o) do if o == null then return false @@ -456,24 +502,42 @@ abstract class Text return self.chars == o.chars end - redef fun <(o) + # Lexicographical comparaison + # + # assert "abc" < "xy" + # assert "ABC" < "abc" + redef fun <(other) do - return self.chars < o.chars + var self_chars = self.chars.iterator + var other_chars = other.chars.iterator + + while self_chars.is_ok and other_chars.is_ok do + if self_chars.item < other_chars.item then return true + if self_chars.item > other_chars.item then return false + self_chars.next + other_chars.next + end + + if self_chars.is_ok then + return false + else + return true + end end # Flat representation of self fun flatten: FlatText is abstract + private var hash_cache: nullable Int = null + redef fun hash do if hash_cache == null then # djb2 hash algorithm var h = 5381 - var i = length - 1 for char in self.chars do - h = (h * 32) + h + char.ascii - i -= 1 + h = h.lshift(5) + h + char.ascii end hash_cache = h @@ -492,7 +556,7 @@ abstract class FlatText # Real items, used as cache for to_cstring is called private var real_items: nullable NativeString = null - redef var length: Int + redef var length: Int = 0 init do end @@ -510,14 +574,11 @@ end # Abstract class for the SequenceRead compatible # views on String and Buffer objects -abstract class StringCharView +private abstract class StringCharView super SequenceRead[Char] - super Comparable type SELFTYPE: Text - redef type OTHER: StringCharView - private var target: SELFTYPE private init(tgt: SELFTYPE) @@ -531,85 +592,12 @@ abstract class StringCharView redef fun iterator: IndexedIterator[Char] do return self.iterator_from(0) - # Gets a new Iterator starting at position `pos` - # - # Ex : - # var iter = "abcd".iterator_from(2) - # while iter.is_ok do - # printn iter.item - # iter.next - # end - # - # Outputs : cd - fun iterator_from(pos: Int): IndexedIterator[Char] is abstract - - # Gets an iterator starting at the end and going backwards - # - # Ex : - # var reviter = "now step live...".reverse_iterator - # while reviter.is_ok do - # printn reviter.item - # reviter.next - # end - # - # Outputs : ...evil pets won - fun reverse_iterator: IndexedIterator[Char] do return self.reverse_iterator_from(self.length - 1) - - # Gets an iterator on the chars of self starting from `pos` - # - # Ex : - # var iter = "abcd".reverse_iterator_from(1) - # while iter.is_ok do - # printn iter.item - # iter.next - # end - # - # Outputs : ba - fun reverse_iterator_from(pos: Int): IndexedIterator[Char] is abstract - - redef fun has(c: Char): Bool - do - for i in self do - if i == c then return true - end - return false - end - - redef fun ==(other) - do - if other == null then return false - if not other isa StringCharView then return false - var other_chars = other.iterator - for i in self do - if i != other_chars.item then return false - other_chars.next - end - return true - end - - redef fun <(other) - do - var self_chars = self.iterator - var other_chars = other.iterator - - while self_chars.is_ok and other_chars.is_ok do - if self_chars.item < other_chars.item then return true - if self_chars.item > other_chars.item then return false - self_chars.next - other_chars.next - end - - if self_chars.is_ok then - return false - else - return true - end - end + redef fun reverse_iterator do return self.reverse_iterator_from(self.length - 1) end # View on Buffer objects, extends Sequence # for mutation operations -abstract class BufferCharView +private abstract class BufferCharView super StringCharView super Sequence[Char] @@ -624,6 +612,28 @@ abstract class String redef fun to_s do return self + fun append(s: String): SELFTYPE is abstract + + fun prepend(s: String): SELFTYPE is abstract + + fun insert_at(s: String, pos: Int): SELFTYPE is abstract +end + +private class FlatSubstringsIter + super Iterator[FlatText] + + var tgt: nullable FlatText + + init(tgt: FlatText) do self.tgt = tgt + + redef fun item do + assert is_ok + return tgt.as(not null) + end + + redef fun is_ok do return tgt != null + + redef fun next do tgt = null end # Immutable strings of characters. @@ -631,38 +641,29 @@ class FlatString super FlatText super String - redef type SELFTYPE: FlatString - redef type SELFVIEW: FlatStringCharView - # Index in _items of the start of the string private var index_from: Int # Indes in _items of the last item of the string private var index_to: Int - redef var chars: SELFVIEW = new FlatStringCharView(self) + redef var chars: SequenceRead[Char] = new FlatStringCharView(self) ################################################ # AbstractString specific methods # ################################################ - redef fun [](index) do - assert index >= 0 - # Check that the index (+ index_from) is not larger than indexTo - # In other terms, if the index is valid - assert (index + index_from) <= index_to - return items[index + index_from] - end - redef fun reversed do var native = calloc_string(self.length + 1) - var reviter = chars.reverse_iterator + var length = self.length + var items = self.items var pos = 0 - while reviter.is_ok do - native[pos] = reviter.item + var ipos = length-1 + while pos < length do + native[pos] = items[ipos] pos += 1 - reviter.next + ipos -= 1 end return native.to_s_with_length(self.length) end @@ -752,7 +753,6 @@ class FlatString index_to = to end - # Return a null terminated char * redef fun to_cstring: NativeString do if real_items != null then return real_items.as(not null) @@ -793,9 +793,6 @@ class FlatString return true end - # The comparison between two strings is done on a lexicographical basis - # - # assert ("aa" < "b") == true redef fun <(other) do if not other isa FlatString then return super @@ -832,9 +829,6 @@ class FlatString return my_length < its_length end - # The concatenation of `self` with `s` - # - # assert "hello " + "world!" == "hello world!" redef fun +(s) do var my_length = self.length @@ -862,9 +856,6 @@ class FlatString return target_string.to_s_with_length(total_length) end - # assert "abc"*3 == "abcabcabc" - # assert "abc"*1 == "abc" - # assert "abc"*0 == "" redef fun *(i) do assert i >= 0 @@ -892,18 +883,15 @@ class FlatString redef fun hash do if hash_cache == null then - # djb2 hash algorythm + # djb2 hash algorithm var h = 5381 - var i = length - 1 + var i = index_from var myitems = items - var strStart = index_from - - i += strStart - while i >= strStart do - h = (h * 32) + h + self.items[i].ascii - i -= 1 + while i <= index_to do + h = h.lshift(5) + h + myitems[i].ascii + i += 1 end hash_cache = h @@ -911,6 +899,8 @@ class FlatString return hash_cache.as(not null) end + + redef fun substrings do return new FlatSubstringsIter(self) end private class FlatStringReverseIterator @@ -975,6 +965,7 @@ private class FlatStringCharView # Check that the index (+ index_from) is not larger than indexTo # In other terms, if the index is valid assert index >= 0 + var target = self.target assert (index + target.index_from) <= target.index_to return target.items[index + target.index_from] end @@ -988,10 +979,10 @@ end abstract class Buffer super Text - redef type SELFVIEW: BufferCharView redef type SELFTYPE: Buffer - var is_dirty = true + # Specific implementations MUST set this to `true` in order to invalidate caches + protected var is_dirty = true # Modifies the char contained at pos `index` # @@ -1004,12 +995,23 @@ abstract class Buffer fun add(c: Char) is abstract # Clears the buffer + # + # var b = new FlatBuffer + # b.append "hello" + # assert not b.is_empty + # b.clear + # assert b.is_empty fun clear is abstract # Enlarges the subsequent array containing the chars of self fun enlarge(cap: Int) is abstract # Adds the content of text `s` at the end of self + # + # var b = new FlatBuffer + # b.append "hello" + # b.append "world" + # assert b == "helloworld" fun append(s: Text) is abstract redef fun hash @@ -1018,6 +1020,9 @@ abstract class Buffer return super end + # In Buffers, the internal sequence of character is mutable + # Thus, `chars` can be used to modify the buffer. + redef fun chars: Sequence[Char] is abstract end # Mutable strings of characters. @@ -1025,12 +1030,13 @@ class FlatBuffer super FlatText super Buffer - redef type SELFVIEW: FlatBufferCharView redef type SELFTYPE: FlatBuffer - redef var chars: SELFVIEW = new FlatBufferCharView(self) + redef var chars: Sequence[Char] = new FlatBufferCharView(self) - var capacity: Int + private var capacity: Int = 0 + + redef fun substrings do return new FlatSubstringsIter(self) redef fun []=(index, item) do @@ -1060,15 +1066,13 @@ class FlatBuffer redef fun enlarge(cap) do - is_dirty = true var c = capacity if cap <= c then return while c <= cap do c = c * 2 + 2 var a = calloc_string(c+1) - items.copy_to(a, length, 0, 0) + if length > 0 then items.copy_to(a, length, 0, 0) items = a capacity = c - items.copy_to(a, length, 0, 0) end redef fun to_s: String @@ -1081,7 +1085,7 @@ class FlatBuffer if is_dirty then var new_native = calloc_string(length + 1) new_native[length] = '\0' - items.copy_to(new_native, length, 0, 0) + if length > 0 then items.copy_to(new_native, length, 0, 0) real_items = new_native is_dirty = false end @@ -1089,7 +1093,7 @@ class FlatBuffer end # Create a new empty string. - init do with_capacity(5) + init do end init from(s: Text) do @@ -1121,6 +1125,7 @@ class FlatBuffer redef fun append(s) do + if s.is_empty then return is_dirty = true var sl = s.length if capacity < length + sl then enlarge(length + sl) @@ -1193,6 +1198,24 @@ class FlatBuffer end return new_buf end + + redef fun to_upper + do + var new_buf = new FlatBuffer.with_capacity(self.length) + for i in self.chars do + new_buf.add(i.to_upper) + end + return new_buf + end + + redef fun to_lower + do + var new_buf = new FlatBuffer.with_capacity(self.length) + for i in self.chars do + new_buf.add(i.to_lower) + end + return new_buf + end end private class FlatBufferReverseIterator @@ -1207,7 +1230,7 @@ private class FlatBufferReverseIterator init with_pos(tgt: FlatBuffer, pos: Int) do target = tgt - target_items = tgt.items + if tgt.length > 0 then target_items = tgt.items curr_pos = pos end @@ -1279,7 +1302,7 @@ private class FlatBufferIterator init with_pos(tgt: FlatBuffer, pos: Int) do target = tgt - target_items = tgt.items + if tgt.length > 0 then target_items = tgt.items curr_pos = pos end @@ -1343,9 +1366,18 @@ redef class Bool end redef class Int + + # Wrapper of strerror C function + private fun strerror_ext: NativeString is extern `{ + return strerror(recv); + `} + + # Returns a string describing error number + fun strerror: String do return strerror_ext.to_s + # Fill `s` with the digits in base `base` of `self` (and with the '-' sign if 'signed' and negative). # assume < to_c max const of char - fun fill_buffer(s: Buffer, base: Int, signed: Bool) + private fun fill_buffer(s: Buffer, base: Int, signed: Bool) do var n: Int # Sign @@ -1379,7 +1411,10 @@ redef class Int return native_int_to_s(len).to_s_with_length(len) end - # return displayable int in hexadecimal (unsigned (not now)) + # return displayable int in hexadecimal + # + # assert 1.to_hex == "1" + # assert (-255).to_hex == "-ff" fun to_hex: String do return to_base(16,false) # return displayable int in base base and signed @@ -1394,6 +1429,11 @@ end redef class Float # Pretty print self, print needoed decimals up to a max of 3. + # + # assert 12.34.to_s == "12.34" + # assert (-0120.03450).to_s == "-120.035" + # + # see `to_precision` for a different precision. redef fun to_s do var str = to_precision( 3 ) if is_inf != 0 or is_nan then return str @@ -1413,6 +1453,11 @@ redef class Float end # `self` representation with `nb` digits after the '.'. + # + # assert 12.345.to_precision(1) == "12.3" + # assert 12.345.to_precision(2) == "12.35" + # assert 12.345.to_precision(3) == "12.345" + # assert 12.345.to_precision(4) == "12.3450" fun to_precision(nb: Int): String do if is_nan then return "nan" @@ -1445,6 +1490,12 @@ redef class Float end end + # `self` representation with `nb` digits after the '.'. + # + # assert 12.345.to_precision_native(1) == "12.3" + # assert 12.345.to_precision_native(2) == "12.35" + # assert 12.345.to_precision_native(3) == "12.345" + # assert 12.345.to_precision_native(4) == "12.3450" fun to_precision_native(nb: Int): String import NativeString.to_s `{ int size; char *str; @@ -1467,27 +1518,37 @@ redef class Char end # Returns true if the char is a numerical digit + # + # assert '0'.is_numeric + # assert '9'.is_numeric + # assert not 'a'.is_numeric + # assert not '?'.is_numeric fun is_numeric: Bool do - if self >= '0' and self <= '9' - then - return true - end - return false + return self >= '0' and self <= '9' end # Returns true if the char is an alpha digit + # + # assert 'a'.is_alpha + # assert 'Z'.is_alpha + # assert not '0'.is_alpha + # assert not '?'.is_alpha fun is_alpha: Bool do - if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true - return false + return (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') end # Returns true if the char is an alpha or a numeric digit + # + # assert 'a'.is_alphanumeric + # assert 'Z'.is_alphanumeric + # assert '0'.is_alphanumeric + # assert '9'.is_alphanumeric + # assert not '?'.is_alphanumeric fun is_alphanumeric: Bool do - if self.is_numeric or self.is_alpha then return true - return false + return self.is_numeric or self.is_alpha end end @@ -1504,7 +1565,7 @@ redef class Collection[E] # # assert [1, 2, 3].join(":") == "1:2:3" # assert [1..3].join(":") == "1:2:3" - fun join(sep: String): String + fun join(sep: Text): String do if is_empty then return "" @@ -1660,3 +1721,47 @@ redef class Sys private fun native_argv(i: Int): NativeString is intern end +# Comparator that efficienlty use `to_s` to compare things +# +# The comparaison call `to_s` on object and use the result to order things. +# +# var a = [1, 2, 3, 10, 20] +# (new CachedAlphaComparator).sort(a) +# assert a == [1, 10, 2, 20, 3] +# +# Internally the result of `to_s` is cached in a HashMap to counter +# uneficient implementation of `to_s`. +# +# Note: it caching is not usefull, see `alpha_comparator` +class CachedAlphaComparator + super Comparator[Object] + + private var cache = new HashMap[Object, String] + + private fun do_to_s(a: Object): String do + if cache.has_key(a) then return cache[a] + var res = a.to_s + cache[a] = res + return res + end + + redef fun compare(a, b) do + return do_to_s(a) <=> do_to_s(b) + end +end + +# see `alpha_comparator` +private class AlphaComparator + super Comparator[Object] + redef fun compare(a, b) do return a.to_s <=> b.to_s +end + +# Stateless comparator that naively use `to_s` to compare things. +# +# Note: the result of `to_s` is not cached, thus can be invoked a lot +# on a single instace. See `CachedAlphaComparator` as an alternative. +# +# var a = [1, 2, 3, 10, 20] +# alpha_comparator.sort(a) +# assert a == [1, 10, 2, 20, 3] +fun alpha_comparator: Comparator[Object] do return once new AlphaComparator