`{
#include <stdio.h>
+#include <string.h>
`}
###############################################################################
redef type OTHER: Text
- # Type of the view on self (.chars)
- type SELFVIEW: StringCharView
-
# Type of self (used for factorization of several methods, ex : substring_from, empty...)
type SELFTYPE: Text
- var hash_cache: nullable Int = null
-
# Gets a view on the chars of the Text object
- fun chars: SELFVIEW is abstract
+ #
+ # assert "hello".chars.to_a == ['h', 'e', 'l', 'l', 'o']
+ fun chars: SequenceRead[Char] is abstract
# Number of characters contained in self.
+ #
+ # assert "12345".length == 5
+ # assert "".length == 0
fun length: Int is abstract
# Create a substring.
# In this case, `from += count` and `count -= from`.
fun substring(from: Int, count: Int): SELFTYPE is abstract
+ # Iterates on the substrings of self if any
+ fun substrings: Iterator[Text] is abstract
+
# Concatenates `o` to `self`
+ #
+ # assert "hello" + "world" == "helloworld"
+ # assert "" + "hello" + "" == "hello"
fun +(o: Text): SELFTYPE is abstract
# Auto-concatenates self `i` times
+ #
+ # assert "abc" * 4 == "abcabcabcabc"
+ # assert "abc" * 1 == "abc"
+ # assert "abc" * 0 == ""
fun *(i: Int): SELFTYPE is abstract
# Is the current Text empty (== "")
- # assert "".is_empty
- # assert not "foo".is_empty
+ #
+ # assert "".is_empty
+ # assert not "foo".is_empty
fun is_empty: Bool do return self.length == 0
# Returns an empty Text of the right type
- fun empty: SELFTYPE is abstract
+ #
+ # This method is used internally to get the right
+ # implementation of an empty string.
+ protected fun empty: SELFTYPE is abstract
# Gets the first char of the Text
#
var iter = self.chars.iterator_from(pos)
while iter.is_ok do
if iter.item == c then return iter.index
+ iter.next
end
return -1
end
fun to_cstring: NativeString do return flatten.to_cstring
# The index of the last occurrence of an element starting from pos (in reverse order).
- # Example :
- # assert "/etc/bin/test/test.nit".last_index_of_from('/', length-1) == 13
- # assert "/etc/bin/test/test.nit".last_index_of_from('/', 12) == 8
+ #
+ # var s = "/etc/bin/test/test.nit"
+ # assert s.last_index_of_from('/', s.length-1) == 13
+ # assert s.last_index_of_from('/', 12) == 8
#
# Returns -1 if not found
#
end
# Returns a reversed version of self
+ #
+ # assert "hello".reversed == "olleh"
+ # assert "bob".reversed == "bob"
+ # assert "".reversed == ""
fun reversed: SELFTYPE is abstract
# Does self have a substring `str` starting from position `pos`?
fun has_substring(str: String, pos: Int): Bool
do
var myiter = self.chars.iterator_from(pos)
- var itsiter = str.iterator
+ var itsiter = str.chars.iterator
while myiter.is_ok and itsiter.is_ok do
if myiter.item != itsiter.item then return false
myiter.next
end
# If `self` contains only digits and alpha <= 'f', return the corresponding integer.
+ #
+ # assert "ff".to_hex == 255
fun to_hex: Int do return a_to(16)
# If `self` contains only digits and letters, return the corresponding integer in a given base
fun to_lower : SELFTYPE is abstract
# Removes the whitespaces at the beginning of self
+ #
+ # assert " \n\thello \n\t".l_trim == "hello \n\t"
+ #
+ # A whitespace is defined as any character which ascii value is less than or equal to 32
fun l_trim: SELFTYPE
do
var iter = self.chars.iterator
end
# Removes the whitespaces at the end of self
+ #
+ # assert " \n\thello \n\t".r_trim == " \n\thello"
+ #
+ # A whitespace is defined as any character which ascii value is less than or equal to 32
fun r_trim: SELFTYPE
do
var iter = self.chars.reverse_iterator
fun escape_more_to_c(chars: String): String
do
var b = new FlatBuffer
- for c in escape_to_c do
+ for c in escape_to_c.chars do
if chars.chars.has(c) then
b.add('\\')
end
return b.to_s
end
- # Escape to c plus braces
+ # Escape to C plus braces
#
# assert "\n\"'\\\{\}".escape_to_nit == "\\n\\\"\\'\\\\\\\{\\\}"
fun escape_to_nit: String do return escape_more_to_c("\{\}")
# Return a string where Nit escape sequences are transformed.
#
- # Example:
# var s = "\\n"
# assert s.length == 2
# var u = s.unescape_nit
# assert u.length == 1
- # assert u[0].ascii == 10 # (the ASCII value of the "new line" character)
+ # assert u.chars[0].ascii == 10 # (the ASCII value of the "new line" character)
fun unescape_nit: String
do
var res = new FlatBuffer.with_capacity(self.length)
var was_slash = false
- for c in self do
+ for c in chars do
if not was_slash then
if c == '\\' then
was_slash = true
return res.to_s
end
+ # Equality of text
+ # Two pieces of text are equals if thez have the same characters in the same order.
+ #
+ # assert "hello" == "hello"
+ # assert "hello" != "HELLO"
+ # assert "hello" == "hel"+"lo"
+ #
+ # Things that are not Text are not equal.
+ #
+ # assert "9" != '9'
+ # assert "9" != ['9']
+ # assert "9" != 9
+ #
+ # assert "9".chars.first == '9' # equality of Char
+ # assert "9".chars == ['9'] # equality of Sequence
+ # assert "9".to_i == 9 # equality of Int
redef fun ==(o)
do
if o == null then return false
return self.chars == o.chars
end
- redef fun <(o)
+ # Lexicographical comparaison
+ #
+ # assert "abc" < "xy"
+ # assert "ABC" < "abc"
+ redef fun <(other)
do
- return self.chars < o.chars
+ var self_chars = self.chars.iterator
+ var other_chars = other.chars.iterator
+
+ while self_chars.is_ok and other_chars.is_ok do
+ if self_chars.item < other_chars.item then return true
+ if self_chars.item > other_chars.item then return false
+ self_chars.next
+ other_chars.next
+ end
+
+ if self_chars.is_ok then
+ return false
+ else
+ return true
+ end
end
# Flat representation of self
fun flatten: FlatText is abstract
+ private var hash_cache: nullable Int = null
+
redef fun hash
do
if hash_cache == null then
# djb2 hash algorithm
var h = 5381
- var i = length - 1
for char in self.chars do
- h = (h * 32) + h + char.ascii
- i -= 1
+ h = h.lshift(5) + h + char.ascii
end
hash_cache = h
# Real items, used as cache for to_cstring is called
private var real_items: nullable NativeString = null
- redef var length: Int
+ redef var length: Int = 0
init do end
# Abstract class for the SequenceRead compatible
# views on String and Buffer objects
-abstract class StringCharView
+private abstract class StringCharView
super SequenceRead[Char]
- super Comparable
type SELFTYPE: Text
- redef type OTHER: StringCharView
-
private var target: SELFTYPE
private init(tgt: SELFTYPE)
redef fun iterator: IndexedIterator[Char] do return self.iterator_from(0)
- # Gets a new Iterator starting at position `pos`
- #
- # Ex :
- # var iter = "abcd".iterator_from(2)
- # while iter.is_ok do
- # printn iter.item
- # iter.next
- # end
- #
- # Outputs : cd
- fun iterator_from(pos: Int): IndexedIterator[Char] is abstract
-
- # Gets an iterator starting at the end and going backwards
- #
- # Ex :
- # var reviter = "now step live...".reverse_iterator
- # while reviter.is_ok do
- # printn reviter.item
- # reviter.next
- # end
- #
- # Outputs : ...evil pets won
- fun reverse_iterator: IndexedIterator[Char] do return self.reverse_iterator_from(self.length - 1)
-
- # Gets an iterator on the chars of self starting from `pos`
- #
- # Ex :
- # var iter = "abcd".reverse_iterator_from(1)
- # while iter.is_ok do
- # printn iter.item
- # iter.next
- # end
- #
- # Outputs : ba
- fun reverse_iterator_from(pos: Int): IndexedIterator[Char] is abstract
-
- redef fun has(c: Char): Bool
- do
- for i in self do
- if i == c then return true
- end
- return false
- end
-
- redef fun ==(other)
- do
- if other == null then return false
- if not other isa StringCharView then return false
- var other_chars = other.iterator
- for i in self do
- if i != other_chars.item then return false
- other_chars.next
- end
- return true
- end
-
- redef fun <(other)
- do
- var self_chars = self.iterator
- var other_chars = other.iterator
-
- while self_chars.is_ok and other_chars.is_ok do
- if self_chars.item < other_chars.item then return true
- if self_chars.item > other_chars.item then return false
- self_chars.next
- other_chars.next
- end
-
- if self_chars.is_ok then
- return false
- else
- return true
- end
- end
+ redef fun reverse_iterator do return self.reverse_iterator_from(self.length - 1)
end
# View on Buffer objects, extends Sequence
# for mutation operations
-abstract class BufferCharView
+private abstract class BufferCharView
super StringCharView
super Sequence[Char]
redef fun to_s do return self
+ fun append(s: String): SELFTYPE is abstract
+
+ fun prepend(s: String): SELFTYPE is abstract
+
+ fun insert_at(s: String, pos: Int): SELFTYPE is abstract
+end
+
+private class FlatSubstringsIter
+ super Iterator[FlatText]
+
+ var tgt: nullable FlatText
+
+ init(tgt: FlatText) do self.tgt = tgt
+
+ redef fun item do
+ assert is_ok
+ return tgt.as(not null)
+ end
+
+ redef fun is_ok do return tgt != null
+
+ redef fun next do tgt = null
end
# Immutable strings of characters.
super FlatText
super String
- redef type SELFTYPE: FlatString
- redef type SELFVIEW: FlatStringCharView
-
# Index in _items of the start of the string
private var index_from: Int
# Indes in _items of the last item of the string
private var index_to: Int
- redef var chars: SELFVIEW = new FlatStringCharView(self)
+ redef var chars: SequenceRead[Char] = new FlatStringCharView(self)
################################################
# AbstractString specific methods #
################################################
- redef fun [](index) do
- assert index >= 0
- # Check that the index (+ index_from) is not larger than indexTo
- # In other terms, if the index is valid
- assert (index + index_from) <= index_to
- return items[index + index_from]
- end
-
redef fun reversed
do
var native = calloc_string(self.length + 1)
- var reviter = chars.reverse_iterator
+ var length = self.length
+ var items = self.items
var pos = 0
- while reviter.is_ok do
- native[pos] = reviter.item
+ var ipos = length-1
+ while pos < length do
+ native[pos] = items[ipos]
pos += 1
- reviter.next
+ ipos -= 1
end
return native.to_s_with_length(self.length)
end
index_to = to
end
- # Return a null terminated char *
redef fun to_cstring: NativeString
do
if real_items != null then return real_items.as(not null)
return true
end
- # The comparison between two strings is done on a lexicographical basis
- #
- # assert ("aa" < "b") == true
redef fun <(other)
do
if not other isa FlatString then return super
return my_length < its_length
end
- # The concatenation of `self` with `s`
- #
- # assert "hello " + "world!" == "hello world!"
redef fun +(s)
do
var my_length = self.length
return target_string.to_s_with_length(total_length)
end
- # assert "abc"*3 == "abcabcabc"
- # assert "abc"*1 == "abc"
- # assert "abc"*0 == ""
redef fun *(i)
do
assert i >= 0
redef fun hash
do
if hash_cache == null then
- # djb2 hash algorythm
+ # djb2 hash algorithm
var h = 5381
- var i = length - 1
+ var i = index_from
var myitems = items
- var strStart = index_from
-
- i += strStart
- while i >= strStart do
- h = (h * 32) + h + self.items[i].ascii
- i -= 1
+ while i <= index_to do
+ h = h.lshift(5) + h + myitems[i].ascii
+ i += 1
end
hash_cache = h
return hash_cache.as(not null)
end
+
+ redef fun substrings do return new FlatSubstringsIter(self)
end
private class FlatStringReverseIterator
# Check that the index (+ index_from) is not larger than indexTo
# In other terms, if the index is valid
assert index >= 0
+ var target = self.target
assert (index + target.index_from) <= target.index_to
return target.items[index + target.index_from]
end
abstract class Buffer
super Text
- redef type SELFVIEW: BufferCharView
redef type SELFTYPE: Buffer
- var is_dirty = true
+ # Specific implementations MUST set this to `true` in order to invalidate caches
+ protected var is_dirty = true
# Modifies the char contained at pos `index`
#
fun add(c: Char) is abstract
# Clears the buffer
+ #
+ # var b = new FlatBuffer
+ # b.append "hello"
+ # assert not b.is_empty
+ # b.clear
+ # assert b.is_empty
fun clear is abstract
# Enlarges the subsequent array containing the chars of self
fun enlarge(cap: Int) is abstract
# Adds the content of text `s` at the end of self
+ #
+ # var b = new FlatBuffer
+ # b.append "hello"
+ # b.append "world"
+ # assert b == "helloworld"
fun append(s: Text) is abstract
redef fun hash
return super
end
+ # In Buffers, the internal sequence of character is mutable
+ # Thus, `chars` can be used to modify the buffer.
+ redef fun chars: Sequence[Char] is abstract
end
# Mutable strings of characters.
super FlatText
super Buffer
- redef type SELFVIEW: FlatBufferCharView
redef type SELFTYPE: FlatBuffer
- redef var chars: SELFVIEW = new FlatBufferCharView(self)
+ redef var chars: Sequence[Char] = new FlatBufferCharView(self)
- var capacity: Int
+ private var capacity: Int = 0
+
+ redef fun substrings do return new FlatSubstringsIter(self)
redef fun []=(index, item)
do
redef fun enlarge(cap)
do
- is_dirty = true
var c = capacity
if cap <= c then return
while c <= cap do c = c * 2 + 2
var a = calloc_string(c+1)
- items.copy_to(a, length, 0, 0)
+ if length > 0 then items.copy_to(a, length, 0, 0)
items = a
capacity = c
- items.copy_to(a, length, 0, 0)
end
redef fun to_s: String
if is_dirty then
var new_native = calloc_string(length + 1)
new_native[length] = '\0'
- items.copy_to(new_native, length, 0, 0)
+ if length > 0 then items.copy_to(new_native, length, 0, 0)
real_items = new_native
is_dirty = false
end
end
# Create a new empty string.
- init do with_capacity(5)
+ init do end
init from(s: Text)
do
redef fun append(s)
do
+ if s.is_empty then return
is_dirty = true
var sl = s.length
if capacity < length + sl then enlarge(length + sl)
end
return new_buf
end
+
+ redef fun to_upper
+ do
+ var new_buf = new FlatBuffer.with_capacity(self.length)
+ for i in self.chars do
+ new_buf.add(i.to_upper)
+ end
+ return new_buf
+ end
+
+ redef fun to_lower
+ do
+ var new_buf = new FlatBuffer.with_capacity(self.length)
+ for i in self.chars do
+ new_buf.add(i.to_lower)
+ end
+ return new_buf
+ end
end
private class FlatBufferReverseIterator
init with_pos(tgt: FlatBuffer, pos: Int)
do
target = tgt
- target_items = tgt.items
+ if tgt.length > 0 then target_items = tgt.items
curr_pos = pos
end
init with_pos(tgt: FlatBuffer, pos: Int)
do
target = tgt
- target_items = tgt.items
+ if tgt.length > 0 then target_items = tgt.items
curr_pos = pos
end
end
redef class Int
+
+ # Wrapper of strerror C function
+ private fun strerror_ext: NativeString is extern `{
+ return strerror(recv);
+ `}
+
+ # Returns a string describing error number
+ fun strerror: String do return strerror_ext.to_s
+
# Fill `s` with the digits in base `base` of `self` (and with the '-' sign if 'signed' and negative).
# assume < to_c max const of char
- fun fill_buffer(s: Buffer, base: Int, signed: Bool)
+ private fun fill_buffer(s: Buffer, base: Int, signed: Bool)
do
var n: Int
# Sign
return native_int_to_s(len).to_s_with_length(len)
end
- # return displayable int in hexadecimal (unsigned (not now))
+ # return displayable int in hexadecimal
+ #
+ # assert 1.to_hex == "1"
+ # assert (-255).to_hex == "-ff"
fun to_hex: String do return to_base(16,false)
# return displayable int in base base and signed
redef class Float
# Pretty print self, print needoed decimals up to a max of 3.
+ #
+ # assert 12.34.to_s == "12.34"
+ # assert (-0120.03450).to_s == "-120.035"
+ #
+ # see `to_precision` for a different precision.
redef fun to_s do
var str = to_precision( 3 )
if is_inf != 0 or is_nan then return str
end
# `self` representation with `nb` digits after the '.'.
+ #
+ # assert 12.345.to_precision(1) == "12.3"
+ # assert 12.345.to_precision(2) == "12.35"
+ # assert 12.345.to_precision(3) == "12.345"
+ # assert 12.345.to_precision(4) == "12.3450"
fun to_precision(nb: Int): String
do
if is_nan then return "nan"
end
end
+ # `self` representation with `nb` digits after the '.'.
+ #
+ # assert 12.345.to_precision_native(1) == "12.3"
+ # assert 12.345.to_precision_native(2) == "12.35"
+ # assert 12.345.to_precision_native(3) == "12.345"
+ # assert 12.345.to_precision_native(4) == "12.3450"
fun to_precision_native(nb: Int): String import NativeString.to_s `{
int size;
char *str;
end
# Returns true if the char is a numerical digit
+ #
+ # assert '0'.is_numeric
+ # assert '9'.is_numeric
+ # assert not 'a'.is_numeric
+ # assert not '?'.is_numeric
fun is_numeric: Bool
do
- if self >= '0' and self <= '9'
- then
- return true
- end
- return false
+ return self >= '0' and self <= '9'
end
# Returns true if the char is an alpha digit
+ #
+ # assert 'a'.is_alpha
+ # assert 'Z'.is_alpha
+ # assert not '0'.is_alpha
+ # assert not '?'.is_alpha
fun is_alpha: Bool
do
- if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
- return false
+ return (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z')
end
# Returns true if the char is an alpha or a numeric digit
+ #
+ # assert 'a'.is_alphanumeric
+ # assert 'Z'.is_alphanumeric
+ # assert '0'.is_alphanumeric
+ # assert '9'.is_alphanumeric
+ # assert not '?'.is_alphanumeric
fun is_alphanumeric: Bool
do
- if self.is_numeric or self.is_alpha then return true
- return false
+ return self.is_numeric or self.is_alpha
end
end
#
# assert [1, 2, 3].join(":") == "1:2:3"
# assert [1..3].join(":") == "1:2:3"
- fun join(sep: String): String
+ fun join(sep: Text): String
do
if is_empty then return ""
private fun native_argv(i: Int): NativeString is intern
end
+# Comparator that efficienlty use `to_s` to compare things
+#
+# The comparaison call `to_s` on object and use the result to order things.
+#
+# var a = [1, 2, 3, 10, 20]
+# (new CachedAlphaComparator).sort(a)
+# assert a == [1, 10, 2, 20, 3]
+#
+# Internally the result of `to_s` is cached in a HashMap to counter
+# uneficient implementation of `to_s`.
+#
+# Note: it caching is not usefull, see `alpha_comparator`
+class CachedAlphaComparator
+ super Comparator[Object]
+
+ private var cache = new HashMap[Object, String]
+
+ private fun do_to_s(a: Object): String do
+ if cache.has_key(a) then return cache[a]
+ var res = a.to_s
+ cache[a] = res
+ return res
+ end
+
+ redef fun compare(a, b) do
+ return do_to_s(a) <=> do_to_s(b)
+ end
+end
+
+# see `alpha_comparator`
+private class AlphaComparator
+ super Comparator[Object]
+ redef fun compare(a, b) do return a.to_s <=> b.to_s
+end
+
+# Stateless comparator that naively use `to_s` to compare things.
+#
+# Note: the result of `to_s` is not cached, thus can be invoked a lot
+# on a single instace. See `CachedAlphaComparator` as an alternative.
+#
+# var a = [1, 2, 3, 10, 20]
+# alpha_comparator.sort(a)
+# assert a == [1, 10, 2, 20, 3]
+fun alpha_comparator: Comparator[Object] do return once new AlphaComparator