#
# This file is free software, which comes along with NIT. This software is
# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
-# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. You can modify it is you want, provided this header
# is kept unaltered, and a notification of the changes is added.
# You are allowed to redistribute it and sell it, alone or is a part of
# Basic manipulations of strings of characters
module string
+import math
intrude import collection # FIXME should be collection::array
`{
#include <stdio.h>
+#include <string.h>
`}
###############################################################################
# String #
###############################################################################
-# Common subclass for String and Buffer
-abstract class AbstractString
- super AbstractArrayRead[Char]
+# High-level abstraction for all text representations
+abstract class Text
+ super Comparable
+ super StringCapable
- readable private var _items: NativeString
+ redef type OTHER: Text
- fun chars: StringCharView is abstract
+ # Type of self (used for factorization of several methods, ex : substring_from, empty...)
+ type SELFTYPE: Text
- # Access a character at `index` in the string.
+ # Gets a view on the chars of the Text object
#
- # assert "abcd"[2] == 'c'
- redef fun [](index) do return _items[index]
+ # assert "hello".chars.to_a == ['h', 'e', 'l', 'l', 'o']
+ fun chars: SequenceRead[Char] is abstract
+
+ # Number of characters contained in self.
+ #
+ # assert "12345".length == 5
+ # assert "".length == 0
+ fun length: Int is abstract
# Create a substring.
#
# A `from` index < 0 will be replaced by 0.
# Unless a `count` value is > 0 at the same time.
# In this case, `from += count` and `count -= from`.
- fun substring(from: Int, count: Int): String
+ fun substring(from: Int, count: Int): SELFTYPE is abstract
+
+ # Concatenates `o` to `self`
+ #
+ # assert "hello" + "world" == "helloworld"
+ # assert "" + "hello" + "" == "hello"
+ fun +(o: Text): SELFTYPE is abstract
+
+ # Auto-concatenates self `i` times
+ #
+ # assert "abc" * 4 == "abcabcabcabc"
+ # assert "abc" * 1 == "abc"
+ # assert "abc" * 0 == ""
+ fun *(i: Int): SELFTYPE is abstract
+
+ # Is the current Text empty (== "")
+ #
+ # assert "".is_empty
+ # assert not "foo".is_empty
+ fun is_empty: Bool do return self.length == 0
+
+ # Returns an empty Text of the right type
+ #
+ # This method is used internally to get the right
+ # implementation of an empty string.
+ protected fun empty: SELFTYPE is abstract
+
+ # Gets the first char of the Text
+ #
+ # DEPRECATED : Use self.chars.first instead
+ fun first: Char do return self.chars[0]
+
+ # Access a character at `index` in the string.
+ #
+ # assert "abcd"[2] == 'c'
+ #
+ # DEPRECATED : Use self.chars.[] instead
+ fun [](index: Int): Char do return self.chars[index]
+
+ # Gets the index of the first occurence of 'c'
+ #
+ # Returns -1 if not found
+ #
+ # DEPRECATED : Use self.chars.index_of instead
+ fun index_of(c: Char): Int
do
- assert count >= 0
- count += from
- if from < 0 then from = 0
- if count > length then count = length
- if from < count then
- var r = new Buffer.with_capacity(count - from)
- while from < count do
- r.chars.push(_items[from])
- from += 1
- end
- return r.to_s
- else
- return ""
+ return index_of_from(c, 0)
+ end
+
+ # Gets the last char of self
+ #
+ # DEPRECATED : Use self.chars.last instead
+ fun last: Char do return self.chars[length-1]
+
+ # Gets the index of the first occurence of ´c´ starting from ´pos´
+ #
+ # Returns -1 if not found
+ #
+ # DEPRECATED : Use self.chars.index_of_from instead
+ fun index_of_from(c: Char, pos: Int): Int
+ do
+ var iter = self.chars.iterator_from(pos)
+ while iter.is_ok do
+ if iter.item == c then return iter.index
+ end
+ return -1
+ end
+
+ # Gets the last index of char ´c´
+ #
+ # Returns -1 if not found
+ #
+ # DEPRECATED : Use self.chars.last_index_of instead
+ fun last_index_of(c: Char): Int
+ do
+ return last_index_of_from(c, length - 1)
+ end
+
+ # Return a null terminated char *
+ fun to_cstring: NativeString do return flatten.to_cstring
+
+ # The index of the last occurrence of an element starting from pos (in reverse order).
+ #
+ # var s = "/etc/bin/test/test.nit"
+ # assert s.last_index_of_from('/', s.length-1) == 13
+ # assert s.last_index_of_from('/', 12) == 8
+ #
+ # Returns -1 if not found
+ #
+ # DEPRECATED : Use self.chars.last_index_of_from instead
+ fun last_index_of_from(item: Char, pos: Int): Int
+ do
+ var iter = self.chars.reverse_iterator_from(pos)
+ while iter.is_ok do
+ if iter.item == item then return iter.index
+ iter.next
end
+ return -1
+ end
+
+ # Gets an iterator on the chars of self
+ #
+ # DEPRECATED : Use self.chars.iterator instead
+ fun iterator: Iterator[Char]
+ do
+ return self.chars.iterator
end
+ # Is 'c' contained in self ?
+ #
+ # DEPRECATED : Use self.chars.has instead
+ fun has(c: Char): Bool
+ do
+ return self.chars.has(c)
+ end
+
+ # Gets an Array containing the chars of self
+ #
+ # DEPRECATED : Use self.chars.to_a instead
+ fun to_a: Array[Char] do return chars.to_a
+
# Create a substring from `self` beginning at the `from` position
#
# assert "abcd".substring_from(1) == "bcd"
# assert "abcd".substring_from(2) == "cd"
#
# As with substring, a `from` index < 0 will be replaced by 0
- fun substring_from(from: Int): String
+ fun substring_from(from: Int): SELFTYPE
do
- assert from < length
+ if from > self.length then return empty
+ if from < 0 then from = 0
return substring(from, length - from)
end
+ # Returns a reversed version of self
+ #
+ # assert "hello".reversed == "olleh"
+ # assert "bob".reversed == "bob"
+ # assert "".reversed == ""
+ fun reversed: SELFTYPE is abstract
+
# Does self have a substring `str` starting from position `pos`?
#
# assert "abcd".has_substring("bc",1) == true
# assert "abcd".has_substring("bc",2) == false
fun has_substring(str: String, pos: Int): Bool
do
- var itsindex = str.length - 1
- var myindex = pos + itsindex
- var myitems = _items
- var itsitems = str._items
- if myindex > length or itsindex > myindex then return false
- var its_index_from = str._index_from
- itsindex += its_index_from
- while itsindex >= its_index_from do
- if myitems[myindex] != itsitems[itsindex] then return false
- myindex -= 1
- itsindex -= 1
+ var myiter = self.chars.iterator_from(pos)
+ var itsiter = str.chars.iterator
+ while myiter.is_ok and itsiter.is_ok do
+ if myiter.item != itsiter.item then return false
+ myiter.next
+ itsiter.next
end
+ if itsiter.is_ok then return false
return true
end
end
# If `self` contains only digits and alpha <= 'f', return the corresponding integer.
+ #
+ # assert "ff".to_hex == 255
fun to_hex: Int do return a_to(16)
# If `self` contains only digits and letters, return the corresponding integer in a given base
# A upper case version of `self`
#
# assert "Hello World!".to_upper == "HELLO WORLD!"
- fun to_upper: String
- do
- var s = new Buffer.with_capacity(length)
- for i in self.chars do s.add(i.to_upper)
- return s.to_s
- end
+ fun to_upper: SELFTYPE is abstract
# A lower case version of `self`
#
# assert "Hello World!".to_lower == "hello world!"
- fun to_lower : String
- do
- var s = new Buffer.with_capacity(length)
- for i in self.chars do s.add(i.to_lower)
- return s.to_s
- end
+ fun to_lower : SELFTYPE is abstract
- # Trims trailing and preceding white spaces
- # A whitespace is defined as any character which ascii value is less than or equal to 32
+ # Removes the whitespaces at the beginning of self
#
- # assert " Hello World ! ".trim == "Hello World !"
- # assert "\na\nb\tc\t".trim == "a\nb\tc"
- fun trim: String
- do
- if self.length == 0 then return self.to_s
- # find position of the first non white space char (ascii < 32) from the start of the string
- var start_pos = 0
- while self.chars[start_pos].ascii <= 32 do
- start_pos += 1
- if start_pos == length then return ""
- end
- # find position of the first non white space char from the end of the string
- var end_pos = length - 1
- while self.chars[end_pos].ascii <= 32 do
- end_pos -= 1
- if end_pos == start_pos then return self.chars[start_pos].to_s
+ # assert " \n\thello \n\t".l_trim == "hello \n\t"
+ #
+ # A whitespace is defined as any character which ascii value is less than or equal to 32
+ fun l_trim: SELFTYPE
+ do
+ var iter = self.chars.iterator
+ while iter.is_ok do
+ if iter.item.ascii > 32 then break
+ iter.next
end
- return self.substring(start_pos, end_pos - start_pos + 1)
+ if iter.index == length then return self.empty
+ return self.substring_from(iter.index)
end
- redef fun output
+ # Removes the whitespaces at the end of self
+ #
+ # assert " \n\thello \n\t".r_trim == " \n\thello"
+ #
+ # A whitespace is defined as any character which ascii value is less than or equal to 32
+ fun r_trim: SELFTYPE
do
- var i = 0
- while i < length do
- _items[i].output
- i += 1
+ var iter = self.chars.reverse_iterator
+ while iter.is_ok do
+ if iter.item.ascii > 32 then break
+ iter.next
end
+ if iter.index == length then return self.empty
+ return self.substring(0, iter.index + 1)
end
+ # Trims trailing and preceding white spaces
+ # A whitespace is defined as any character which ascii value is less than or equal to 32
+ #
+ # assert " Hello World ! ".trim == "Hello World !"
+ # assert "\na\nb\tc\t".trim == "a\nb\tc"
+ fun trim: SELFTYPE do return (self.l_trim).r_trim
+
# Mangle a string to be a unique string only made of alphanumeric characters
fun to_cmangle: String
do
- var res = new Buffer
+ var res = new FlatBuffer
var underscore = false
for c in self.chars do
if (c >= 'a' and c <= 'z') or (c >='A' and c <= 'Z') then
# assert "\n\"'\\".escape_to_c == "\\n\\\"\\'\\\\"
fun escape_to_c: String
do
- var b = new Buffer
+ var b = new FlatBuffer
for c in self.chars do
if c == '\n' then
b.append("\\n")
# assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}"
fun escape_more_to_c(chars: String): String
do
- var b = new Buffer
- for c in escape_to_c do
- if chars.has(c) then
+ var b = new FlatBuffer
+ for c in escape_to_c.chars do
+ if chars.chars.has(c) then
b.add('\\')
end
b.add(c)
return b.to_s
end
- # Escape to c plus braces
+ # Escape to C plus braces
#
# assert "\n\"'\\\{\}".escape_to_nit == "\\n\\\"\\'\\\\\\\{\\\}"
fun escape_to_nit: String do return escape_more_to_c("\{\}")
# Return a string where Nit escape sequences are transformed.
#
- # Example:
# var s = "\\n"
# assert s.length == 2
# var u = s.unescape_nit
# assert u.length == 1
- # assert u[0].ascii == 10 # (the ASCII value of the "new line" character)
+ # assert u.chars[0].ascii == 10 # (the ASCII value of the "new line" character)
fun unescape_nit: String
do
- var res = new Buffer.with_capacity(self.length)
+ var res = new FlatBuffer.with_capacity(self.length)
var was_slash = false
- for c in self do
+ for c in chars do
if not was_slash then
if c == '\\' then
was_slash = true
end
return res.to_s
end
+
+ # Equality of text
+ # Two pieces of text are equals if thez have the same characters in the same order.
+ #
+ # assert "hello" == "hello"
+ # assert "hello" != "HELLO"
+ # assert "hello" == "hel"+"lo"
+ #
+ # Things that are not Text are not equal.
+ #
+ # assert "9" != '9'
+ # assert "9" != ['9']
+ # assert "9" != 9
+ #
+ # assert "9".chars.first == '9' # equality of Char
+ # assert "9".chars == ['9'] # equality of Sequence
+ # assert "9".to_i == 9 # equality of Int
+ redef fun ==(o)
+ do
+ if o == null then return false
+ if not o isa Text then return false
+ if self.is_same_instance(o) then return true
+ if self.length != o.length then return false
+ return self.chars == o.chars
+ end
+
+ # Lexicographical comparaison
+ #
+ # assert "abc" < "xy"
+ # assert "ABC" < "abc"
+ redef fun <(other)
+ do
+ var self_chars = self.chars.iterator
+ var other_chars = other.chars.iterator
+
+ while self_chars.is_ok and other_chars.is_ok do
+ if self_chars.item < other_chars.item then return true
+ if self_chars.item > other_chars.item then return false
+ self_chars.next
+ other_chars.next
+ end
+
+ if self_chars.is_ok then
+ return false
+ else
+ return true
+ end
+ end
+
+ # Flat representation of self
+ fun flatten: FlatText is abstract
+
+ private var hash_cache: nullable Int = null
+
+ redef fun hash
+ do
+ if hash_cache == null then
+ # djb2 hash algorithm
+ var h = 5381
+ var i = length - 1
+
+ for char in self.chars do
+ h = (h * 32) + h + char.ascii
+ i -= 1
+ end
+
+ hash_cache = h
+ end
+ return hash_cache.as(not null)
+ end
+
+end
+
+# All kinds of array-based text representations.
+abstract class FlatText
+ super Text
+
+ private var items: NativeString
+
+ # Real items, used as cache for to_cstring is called
+ private var real_items: nullable NativeString = null
+
+ redef var length: Int
+
+ init do end
+
+ redef fun output
+ do
+ var i = 0
+ while i < length do
+ items[i].output
+ i += 1
+ end
+ end
+
+ redef fun flatten do return self
end
# Abstract class for the SequenceRead compatible
# views on String and Buffer objects
-abstract class StringCharView
+private abstract class StringCharView
super SequenceRead[Char]
- type SELFTYPE: AbstractString
+ type SELFTYPE: Text
private var target: SELFTYPE
redef fun length do return target.length
- redef fun has(c: Char): Bool
- do
- for i in self do
- if i == c then return true
- end
- return false
- end
+ redef fun iterator: IndexedIterator[Char] do return self.iterator_from(0)
+ redef fun reverse_iterator do return self.reverse_iterator_from(self.length - 1)
end
# View on Buffer objects, extends Sequence
# for mutation operations
-abstract class BufferCharView
+private abstract class BufferCharView
super StringCharView
super Sequence[Char]
end
+abstract class String
+ super Text
+
+ redef type SELFTYPE: String
+
+ redef fun to_s do return self
+
+end
+
# Immutable strings of characters.
-class String
- super Comparable
- super AbstractString
- super StringCapable
+class FlatString
+ super FlatText
+ super String
- redef type OTHER: String
+ redef type SELFTYPE: FlatString
# Index in _items of the start of the string
- readable var _index_from: Int
+ private var index_from: Int
# Indes in _items of the last item of the string
- readable var _index_to: Int
+ private var index_to: Int
- redef var chars: StringCharView = new FlatStringCharView(self)
+ redef var chars: SequenceRead[Char] = new FlatStringCharView(self)
################################################
# AbstractString specific methods #
################################################
- redef fun [](index) do
- assert index >= 0
- # Check that the index (+ index_from) is not larger than indexTo
- # In other terms, if the index is valid
- assert (index + _index_from) <= _index_to
- return _items[index + _index_from]
+ redef fun reversed
+ do
+ var native = calloc_string(self.length + 1)
+ var length = self.length
+ var items = self.items
+ var pos = 0
+ var ipos = length-1
+ while pos < length do
+ native[pos] = items[ipos]
+ pos += 1
+ ipos -= 1
+ end
+ return native.to_s_with_length(self.length)
end
- redef fun substring(from: Int, count: Int): String
+ redef fun substring(from, count)
do
assert count >= 0
from = 0
end
- var realFrom = _index_from + from
+ var realFrom = index_from + from
- if (realFrom + count) > _index_to then return new String.with_infos(_items, _index_to - realFrom + 1, realFrom, _index_to)
+ if (realFrom + count) > index_to then return new FlatString.with_infos(items, index_to - realFrom + 1, realFrom, index_to)
- if count == 0 then return ""
+ if count == 0 then return empty
var to = realFrom + count - 1
- return new String.with_infos(_items, to - realFrom + 1, realFrom, to)
+ return new FlatString.with_infos(items, to - realFrom + 1, realFrom, to)
end
- redef fun substring_from(from: Int): String
- do
- if from > _length then return ""
- if from < 0 then from = 0
- return substring(from, _length)
- end
-
- redef fun has_substring(str: String, pos: Int): Bool
- do
- var itsindex = str._length - 1
-
- var myindex = pos + itsindex
- var myitems = _items
-
- var itsitems = str._items
-
- if myindex > _length or itsindex > myindex then return false
+ redef fun empty do return "".as(FlatString)
- var itsindexfrom = str.index_from
- itsindex += itsindexfrom
- myindex += index_from
-
- while itsindex >= itsindexfrom do
- if myitems[myindex] != itsitems[itsindex] then return false
- myindex -= 1
- itsindex -= 1
- end
-
- return true
- end
-
- redef fun to_upper: String
+ redef fun to_upper
do
- var outstr = calloc_string(self._length + 1)
+ var outstr = calloc_string(self.length + 1)
var out_index = 0
- var myitems = self._items
- var index_from = self._index_from
- var max = self._index_to
+ var myitems = self.items
+ var index_from = self.index_from
+ var max = self.index_to
while index_from <= max do
outstr[out_index] = myitems[index_from].to_upper
outstr[self.length] = '\0'
- return outstr.to_s_with_length(self._length)
+ return outstr.to_s_with_length(self.length)
end
- redef fun to_lower : String
+ redef fun to_lower
do
- var outstr = calloc_string(self._length + 1)
+ var outstr = calloc_string(self.length + 1)
var out_index = 0
- var myitems = self._items
- var index_from = self._index_from
- var max = self._index_to
+ var myitems = self.items
+ var index_from = self.index_from
+ var max = self.index_to
while index_from <= max do
outstr[out_index] = myitems[index_from].to_lower
outstr[self.length] = '\0'
- return outstr.to_s_with_length(self._length)
- end
-
- redef fun trim: String
- do
- if self._length == 0 then return self
- # find position of the first non white space char (ascii < 32) from the start of the string
- var start_pos = self._index_from
- while _items[start_pos].ascii <= 32 do
- start_pos += 1
- if start_pos == _index_to + 1 then return ""
- end
- # find position of the first non white space char from the end of the string
- var end_pos = _index_to
- while _items[end_pos].ascii <= 32 do
- end_pos -= 1
- if end_pos == start_pos then return _items[start_pos].to_s
- end
- start_pos -= index_from
- end_pos -= index_from
- return self.substring(start_pos, end_pos - start_pos + 1)
+ return outstr.to_s_with_length(self.length)
end
redef fun output
do
- var i = self._index_from
- var imax = self._index_to
+ var i = self.index_from
+ var imax = self.index_to
while i <= imax do
- _items[i].output
+ items[i].output
i += 1
end
end
private init with_infos(items: NativeString, len: Int, from: Int, to: Int)
do
- self._items = items
- _length = len
- _index_from = from
- _index_to = to
+ self.items = items
+ length = len
+ index_from = from
+ index_to = to
end
- # Return a null terminated char *
- fun to_cstring: NativeString
+ redef fun to_cstring: NativeString
do
- if _index_from > 0 or _index_to != items.cstring_length - 1 then
- var newItems = calloc_string(_length + 1)
- self.items.copy_to(newItems, _length, _index_from, 0)
+ if real_items != null then return real_items.as(not null)
+ if index_from > 0 or index_to != items.cstring_length - 1 then
+ var newItems = calloc_string(length + 1)
+ self.items.copy_to(newItems, length, index_from, 0)
newItems[length] = '\0'
+ self.real_items = newItems
return newItems
end
- return _items
+ return items
end
redef fun ==(other)
do
- if not other isa String then return false
+ if not other isa FlatString then return super
if self.object_id == other.object_id then return true
- var my_length = _length
+ var my_length = length
- if other._length != my_length then return false
+ if other.length != my_length then return false
- var my_index = _index_from
- var its_index = other._index_from
+ var my_index = index_from
+ var its_index = other.index_from
var last_iteration = my_index + my_length
- var itsitems = other._items
- var myitems = self._items
+ var itsitems = other.items
+ var myitems = self.items
while my_index < last_iteration do
if myitems[my_index] != itsitems[its_index] then return false
return true
end
- # The comparison between two strings is done on a lexicographical basis
- #
- # assert ("aa" < "b") == true
redef fun <(other)
do
+ if not other isa FlatString then return super
+
if self.object_id == other.object_id then return false
var my_curr_char : Char
var its_curr_char : Char
- var curr_id_self = self._index_from
- var curr_id_other = other._index_from
+ var curr_id_self = self.index_from
+ var curr_id_other = other.index_from
- var my_items = self._items
- var its_items = other._items
+ var my_items = self.items
+ var its_items = other.items
- var my_length = self._length
- var its_length = other._length
+ var my_length = self.length
+ var its_length = other.length
var max_iterations = curr_id_self + my_length
return my_length < its_length
end
- # The concatenation of `self` with `s`
- #
- # assert "hello " + "world!" == "hello world!"
- fun +(s: String): String
+ redef fun +(s)
do
- var my_length = self._length
- var its_length = s._length
+ var my_length = self.length
+ var its_length = s.length
var total_length = my_length + its_length
var target_string = calloc_string(my_length + its_length + 1)
- self._items.copy_to(target_string, my_length, _index_from, 0)
- s._items.copy_to(target_string, its_length, s._index_from, my_length)
+ self.items.copy_to(target_string, my_length, index_from, 0)
+ if s isa FlatString then
+ s.items.copy_to(target_string, its_length, s.index_from, my_length)
+ else if s isa FlatBuffer then
+ s.items.copy_to(target_string, its_length, 0, my_length)
+ else
+ var curr_pos = my_length
+ for i in s.chars do
+ target_string[curr_pos] = i
+ curr_pos += 1
+ end
+ end
target_string[total_length] = '\0'
return target_string.to_s_with_length(total_length)
end
- # `i` repetitions of `self`
- #
- # assert "abc"*3 == "abcabcabc"
- # assert "abc"*1 == "abc"
- # assert "abc"*0 == ""
- fun *(i: Int): String
+ redef fun *(i)
do
assert i >= 0
- var my_length = self._length
+ var my_length = self.length
var final_length = my_length * i
- var my_items = self._items
+ var my_items = self.items
var target_string = calloc_string((final_length) + 1)
return target_string.to_s_with_length(final_length)
end
- redef fun to_s do return self
-
redef fun hash
do
- # djb2 hash algorythm
- var h = 5381
- var i = _length - 1
+ if hash_cache == null then
+ # djb2 hash algorythm
+ var h = 5381
+ var i = length - 1
+
+ var myitems = items
+ var strStart = index_from
- var myitems = _items
- var strStart = _index_from
+ i += strStart
- i += strStart
+ while i >= strStart do
+ h = (h * 32) + h + self.items[i].ascii
+ i -= 1
+ end
- while i >= strStart do
- h = (h * 32) + h + self._items[i].ascii
- i -= 1
+ hash_cache = h
end
- return h
+ return hash_cache.as(not null)
+ end
+end
+
+private class FlatStringReverseIterator
+ super IndexedIterator[Char]
+
+ var target: FlatString
+
+ var target_items: NativeString
+
+ var curr_pos: Int
+
+ init with_pos(tgt: FlatString, pos: Int)
+ do
+ target = tgt
+ target_items = tgt.items
+ curr_pos = pos + tgt.index_from
end
+
+ redef fun is_ok do return curr_pos >= 0
+
+ redef fun item do return target_items[curr_pos]
+
+ redef fun next do curr_pos -= 1
+
+ redef fun index do return curr_pos - target.index_from
+
end
private class FlatStringIterator
super IndexedIterator[Char]
- var target: String
+ var target: FlatString
var target_items: NativeString
var curr_pos: Int
- init with_pos(tgt: String, pos: Int)
+ init with_pos(tgt: FlatString, pos: Int)
do
target = tgt
target_items = tgt.items
private class FlatStringCharView
super StringCharView
- redef type SELFTYPE: String
+ redef type SELFTYPE: FlatString
redef fun [](index)
do
# Check that the index (+ index_from) is not larger than indexTo
# In other terms, if the index is valid
assert index >= 0
- assert (index + target._index_from) <= target._index_to
- return target._items[index + target._index_from]
+ var target = self.target
+ assert (index + target.index_from) <= target.index_to
+ return target.items[index + target.index_from]
end
- redef fun iterator: IndexedIterator[Char] do return new FlatStringIterator.with_pos(target, 0)
+ redef fun iterator_from(start) do return new FlatStringIterator.with_pos(target, start)
+
+ redef fun reverse_iterator_from(start) do return new FlatStringReverseIterator.with_pos(target, start)
+
+end
+
+abstract class Buffer
+ super Text
+
+ redef type SELFTYPE: Buffer
+
+ # Specific implementations MUST set this to `true` in order to invalidate caches
+ protected var is_dirty = true
+
+ # Modifies the char contained at pos `index`
+ #
+ # DEPRECATED : Use self.chars.[]= instead
+ fun []=(index: Int, item: Char) is abstract
+
+ # Adds a char `c` at the end of self
+ #
+ # DEPRECATED : Use self.chars.add instead
+ fun add(c: Char) is abstract
+
+ # Clears the buffer
+ #
+ # var b = new FlatBuffer
+ # b.append "hello"
+ # assert not b.is_empty
+ # b.clear
+ # assert b.is_empty
+ fun clear is abstract
+
+ # Enlarges the subsequent array containing the chars of self
+ fun enlarge(cap: Int) is abstract
+ # Adds the content of text `s` at the end of self
+ #
+ # var b = new FlatBuffer
+ # b.append "hello"
+ # b.append "world"
+ # assert b == "helloworld"
+ fun append(s: Text) is abstract
+
+ redef fun hash
+ do
+ if is_dirty then hash_cache = null
+ return super
+ end
+
+ # In Buffers, the internal sequence of character is mutable
+ # Thus, `chars` can be used to modify the buffer.
+ redef fun chars: Sequence[Char] is abstract
end
# Mutable strings of characters.
-class Buffer
- super AbstractString
- super Comparable
- super StringCapable
- super AbstractArray[Char]
+class FlatBuffer
+ super FlatText
+ super Buffer
- redef type OTHER: String
+ redef type SELFTYPE: FlatBuffer
- redef var chars: BufferCharView = new FlatBufferCharView(self)
+ redef var chars: Sequence[Char] = new FlatBufferCharView(self)
+
+ private var capacity: Int
redef fun []=(index, item)
do
+ is_dirty = true
if index == length then
add(item)
return
end
assert index >= 0 and index < length
- _items[index] = item
+ items[index] = item
end
redef fun add(c)
do
- if _capacity <= length then enlarge(length + 5)
- _items[length] = c
- _length += 1
+ is_dirty = true
+ if capacity <= length then enlarge(length + 5)
+ items[length] = c
+ length += 1
+ end
+
+ redef fun clear do
+ is_dirty = true
+ length = 0
end
+ redef fun empty do return new FlatBuffer
+
redef fun enlarge(cap)
do
- var c = _capacity
+ is_dirty = true
+ var c = capacity
if cap <= c then return
while c <= cap do c = c * 2 + 2
var a = calloc_string(c+1)
- _items.copy_to(a, length, 0, 0)
- _items = a
- _capacity = c
+ items.copy_to(a, length, 0, 0)
+ items = a
+ capacity = c
+ items.copy_to(a, length, 0, 0)
end
- redef fun append(s)
+ redef fun to_s: String
+ do
+ return to_cstring.to_s_with_length(length)
+ end
+
+ redef fun to_cstring
+ do
+ if is_dirty then
+ var new_native = calloc_string(length + 1)
+ new_native[length] = '\0'
+ items.copy_to(new_native, length, 0, 0)
+ real_items = new_native
+ is_dirty = false
+ end
+ return real_items.as(not null)
+ end
+
+ # Create a new empty string.
+ init do with_capacity(5)
+
+ init from(s: Text)
do
- if s isa String then
- var sl = s.length
- if _capacity < _length + sl then enlarge(_length + sl)
- s.items.copy_to(_items, sl, s._index_from, _length)
- _length += sl
+ capacity = s.length + 1
+ length = s.length
+ items = calloc_string(capacity)
+ if s isa FlatString then
+ s.items.copy_to(items, length, s.index_from, 0)
+ else if s isa FlatBuffer then
+ s.items.copy_to(items, length, 0, 0)
else
- super
+ var curr_pos = 0
+ for i in s.chars do
+ items[curr_pos] = i
+ curr_pos += 1
+ end
end
end
- redef fun to_s: String
+ # Create a new empty string with a given capacity.
+ init with_capacity(cap: Int)
do
- var l = length
- var a = calloc_string(l+1)
- _items.copy_to(a, l, 0, 0)
+ assert cap >= 0
+ # _items = new NativeString.calloc(cap)
+ items = calloc_string(cap+1)
+ capacity = cap
+ length = 0
+ end
- # Ensure the afterlast byte is '\0' to nul-terminated char *
- a[length] = '\0'
+ redef fun append(s)
+ do
+ is_dirty = true
+ var sl = s.length
+ if capacity < length + sl then enlarge(length + sl)
+ if s isa FlatString then
+ s.items.copy_to(items, sl, s.index_from, length)
+ else if s isa FlatBuffer then
+ s.items.copy_to(items, sl, 0, length)
+ else
+ var curr_pos = self.length
+ for i in s.chars do
+ items[curr_pos] = i
+ curr_pos += 1
+ end
+ end
+ length += sl
+ end
- return a.to_s_with_length(length)
+ # Copies the content of self in `dest`
+ fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
+ do
+ var self_chars = self.chars
+ var dest_chars = dest.chars
+ for i in [0..len-1] do
+ dest_chars[new_start+i] = self_chars[start+i]
+ end
end
- redef fun <(s)
+ redef fun substring(from, count)
do
- var i = 0
- var l1 = length
- var l2 = s.length
- while i < l1 and i < l2 do
- var c1 = self.chars[i].ascii
- var c2 = s.chars[i].ascii
- if c1 < c2 then
- return true
- else if c2 < c1 then
- return false
+ assert count >= 0
+ count += from
+ if from < 0 then from = 0
+ if count > length then count = length
+ if from < count then
+ var r = new FlatBuffer.with_capacity(count - from)
+ while from < count do
+ r.chars.push(items[from])
+ from += 1
end
- i += 1
- end
- if l1 < l2 then
- return true
+ return r
else
- return false
+ return new FlatBuffer
end
end
- # Create a new empty string.
- init
+ redef fun reversed
do
- with_capacity(5)
+ var new_buf = new FlatBuffer.with_capacity(self.length)
+ var reviter = self.chars.reverse_iterator
+ while reviter.is_ok do
+ new_buf.add(reviter.item)
+ reviter.next
+ end
+ return new_buf
end
- init from(s: String)
+ redef fun +(other)
do
- _capacity = s.length + 1
- _length = s.length
- _items = calloc_string(_capacity)
- s.items.copy_to(_items, _length, s._index_from, 0)
+ var new_buf = new FlatBuffer.with_capacity(self.length + other.length)
+ new_buf.append(self)
+ new_buf.append(other)
+ return new_buf
end
- # Create a new empty string with a given capacity.
- init with_capacity(cap: Int)
+ redef fun *(repeats)
do
- assert cap >= 0
- # _items = new NativeString.calloc(cap)
- _items = calloc_string(cap+1)
- _capacity = cap
- _length = 0
+ var new_buf = new FlatBuffer.with_capacity(self.length * repeats)
+ for i in [0..repeats[ do
+ new_buf.append(self)
+ end
+ return new_buf
end
+end
- redef fun ==(o)
+private class FlatBufferReverseIterator
+ super IndexedIterator[Char]
+
+ var target: FlatBuffer
+
+ var target_items: NativeString
+
+ var curr_pos: Int
+
+ init with_pos(tgt: FlatBuffer, pos: Int)
do
- if not o isa Buffer then return false
- var l = length
- if o.length != l then return false
- var i = 0
- var it = _items
- var oit = o._items
- while i < l do
- if it[i] != oit[i] then return false
- i += 1
- end
- return true
+ target = tgt
+ target_items = tgt.items
+ curr_pos = pos
end
- readable private var _capacity: Int
+ redef fun index do return curr_pos
+
+ redef fun is_ok do return curr_pos >= 0
+
+ redef fun item do return target_items[curr_pos]
+
+ redef fun next do curr_pos -= 1
+
end
private class FlatBufferCharView
super BufferCharView
super StringCapable
- redef type SELFTYPE: Buffer
-
- init(tgt: Buffer)
- do
- self.target = tgt
- end
+ redef type SELFTYPE: FlatBuffer
- redef fun [](index) do return target._items[index]
+ redef fun [](index) do return target.items[index]
redef fun []=(index, item)
do
add(item)
return
end
- target._items[index] = item
+ target.items[index] = item
end
redef fun push(c)
if target.capacity < s.length then enlarge(s_length + target.length)
end
- redef fun iterator: IndexedIterator[Char] do return new FlatBufferIterator.with_pos(target, 0)
+ redef fun iterator_from(pos) do return new FlatBufferIterator.with_pos(target, pos)
+
+ redef fun reverse_iterator_from(pos) do return new FlatBufferReverseIterator.with_pos(target, pos)
end
private class FlatBufferIterator
super IndexedIterator[Char]
- var target: Buffer
+ var target: FlatBuffer
var target_items: NativeString
var curr_pos: Int
- init with_pos(tgt: Buffer, pos: Int)
+ init with_pos(tgt: FlatBuffer, pos: Int)
do
target = tgt
target_items = tgt.items
# assert true.to_s == "true"
# assert false.to_s == "false"
redef fun to_s
- do
- if self then
- return once "true"
- else
- return once "false"
+ do
+ if self then
+ return once "true"
+ else
+ return once "false"
end
- end
+ end
end
redef class Int
+
+ # Wrapper of strerror C function
+ private fun strerror_ext: NativeString is extern `{
+ return strerror(recv);
+ `}
+
+ # Returns a string describing error number
+ fun strerror: String do return strerror_ext.to_s
+
# Fill `s` with the digits in base `base` of `self` (and with the '-' sign if 'signed' and negative).
# assume < to_c max const of char
- fun fill_buffer(s: Buffer, base: Int, signed: Bool)
+ private fun fill_buffer(s: Buffer, base: Int, signed: Bool)
do
var n: Int
# Sign
end
# Fill digits
var pos = digit_count(base) - 1
- while pos >= 0 and n > 0 do
+ while pos >= 0 and n > 0 do
s.chars[pos] = (n % base).to_c
n = n / base # /
pos -= 1
return native_int_to_s(len).to_s_with_length(len)
end
- # return displayable int in hexadecimal (unsigned (not now))
+ # return displayable int in hexadecimal
+ #
+ # assert 1.to_hex == "1"
+ # assert (-255).to_hex == "-ff"
fun to_hex: String do return to_base(16,false)
# return displayable int in base base and signed
fun to_base(base: Int, signed: Bool): String
do
var l = digit_count(base)
- var s = new Buffer.from(" " * l)
+ var s = new FlatBuffer.from(" " * l)
fill_buffer(s, base, signed)
return s.to_s
end
redef class Float
# Pretty print self, print needoed decimals up to a max of 3.
+ #
+ # assert 12.34.to_s == "12.34"
+ # assert (-0120.03450).to_s == "-120.035"
+ #
+ # see `to_precision` for a different precision.
redef fun to_s do
var str = to_precision( 3 )
+ if is_inf != 0 or is_nan then return str
var len = str.length
for i in [0..len-1] do
var j = len-1-i
end
# `self` representation with `nb` digits after the '.'.
+ #
+ # assert 12.345.to_precision(1) == "12.3"
+ # assert 12.345.to_precision(2) == "12.35"
+ # assert 12.345.to_precision(3) == "12.345"
+ # assert 12.345.to_precision(4) == "12.3450"
fun to_precision(nb: Int): String
do
+ if is_nan then return "nan"
+
+ var isinf = self.is_inf
+ if isinf == 1 then
+ return "inf"
+ else if isinf == -1 then
+ return "-inf"
+ end
+
if nb == 0 then return self.to_i.to_s
var f = self
for i in [0..nb[ do f = f * 10.0
end
end
+ # `self` representation with `nb` digits after the '.'.
+ #
+ # assert 12.345.to_precision_native(1) == "12.3"
+ # assert 12.345.to_precision_native(2) == "12.35"
+ # assert 12.345.to_precision_native(3) == "12.345"
+ # assert 12.345.to_precision_native(4) == "12.3450"
fun to_precision_native(nb: Int): String import NativeString.to_s `{
int size;
char *str;
# assert 'x'.to_s == "x"
redef fun to_s
do
- var s = new Buffer.with_capacity(1)
+ var s = new FlatBuffer.with_capacity(1)
s.chars[0] = self
return s.to_s
end
# Returns true if the char is a numerical digit
+ #
+ # assert '0'.is_numeric
+ # assert '9'.is_numeric
+ # assert not 'a'.is_numeric
+ # assert not '?'.is_numeric
fun is_numeric: Bool
do
- if self >= '0' and self <= '9'
- then
- return true
- end
- return false
+ return self >= '0' and self <= '9'
end
# Returns true if the char is an alpha digit
+ #
+ # assert 'a'.is_alpha
+ # assert 'Z'.is_alpha
+ # assert not '0'.is_alpha
+ # assert not '?'.is_alpha
fun is_alpha: Bool
do
- if (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') then return true
- return false
+ return (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z')
end
# Returns true if the char is an alpha or a numeric digit
+ #
+ # assert 'a'.is_alphanumeric
+ # assert 'Z'.is_alphanumeric
+ # assert '0'.is_alphanumeric
+ # assert '9'.is_alphanumeric
+ # assert not '?'.is_alphanumeric
fun is_alphanumeric: Bool
do
- if self.is_numeric or self.is_alpha then return true
- return false
+ return self.is_numeric or self.is_alpha
end
end
# Concatenate elements.
redef fun to_s
do
- var s = new Buffer
+ var s = new FlatBuffer
for e in self do if e != null then s.append(e.to_s)
return s.to_s
end
#
# assert [1, 2, 3].join(":") == "1:2:3"
# assert [1..3].join(":") == "1:2:3"
- fun join(sep: String): String
+ fun join(sep: Text): String
do
if is_empty then return ""
-
- var s = new Buffer # Result
+
+ var s = new FlatBuffer # Result
# Concat first item
var i = iterator
var e = i.item
if e != null then s.append(e.to_s)
-
+
# Concat other items
i.next
while i.is_ok do
# Fast implementation
redef fun to_s
do
- var s = new Buffer
+ var s = new FlatBuffer
var i = 0
var l = length
while i < l do
fun join(sep: String, couple_sep: String): String
do
if is_empty then return ""
-
- var s = new Buffer # Result
+
+ var s = new FlatBuffer # Result
# Concat first item
var i = iterator
return to_s_with_length(cstring_length)
end
- fun to_s_with_length(length: Int): String
+ fun to_s_with_length(length: Int): FlatString
do
assert length >= 0
- return new String.with_infos(self, length, 0, length - 1)
+ return new FlatString.with_infos(self, length, 0, length - 1)
end
- fun to_s_with_copy: String
+ fun to_s_with_copy: FlatString
do
var length = cstring_length
var new_self = calloc_string(length + 1)
copy_to(new_self, length, 0, 0)
- return new String.with_infos(new_self, length, 0, length - 1)
+ return new FlatString.with_infos(new_self, length, 0, length - 1)
end
end
private fun native_argv(i: Int): NativeString is intern
end
+# Comparator that efficienlty use `to_s` to compare things
+#
+# The comparaison call `to_s` on object and use the result to order things.
+#
+# var a = [1, 2, 3, 10, 20]
+# (new CachedAlphaComparator).sort(a)
+# assert a == [1, 10, 2, 20, 3]
+#
+# Internally the result of `to_s` is cached in a HashMap to counter
+# uneficient implementation of `to_s`.
+#
+# Note: it caching is not usefull, see `alpha_comparator`
+class CachedAlphaComparator
+ super Comparator[Object]
+
+ private var cache = new HashMap[Object, String]
+
+ private fun do_to_s(a: Object): String do
+ if cache.has_key(a) then return cache[a]
+ var res = a.to_s
+ cache[a] = res
+ return res
+ end
+
+ redef fun compare(a, b) do
+ return do_to_s(a) <=> do_to_s(b)
+ end
+end
+
+# see `alpha_comparator`
+private class AlphaComparator
+ super Comparator[Object]
+ redef fun compare(a, b) do return a.to_s <=> b.to_s
+end
+
+# Stateless comparator that naively use `to_s` to compare things.
+#
+# Note: the result of `to_s` is not cached, thus can be invoked a lot
+# on a single instace. See `CachedAlphaComparator` as an alternative.
+#
+# var a = [1, 2, 3, 10, 20]
+# alpha_comparator.sort(a)
+# assert a == [1, 10, 2, 20, 3]
+fun alpha_comparator: Comparator[Object] do return once new AlphaComparator