+# High-level abstraction for all text representations
+abstract class Text
+ super Comparable
+ super StringCapable
+
+ redef type OTHER: Text
+
+ # Type of the view on self (.chars)
+ type SELFVIEW: StringCharView
+
+ # Type of self (used for factorization of several methods, ex : substring_from, empty...)
+ type SELFTYPE: Text
+
+ var hash_cache: nullable Int = null
+
+ # Gets a view on the chars of the Text object
+ fun chars: SELFVIEW is abstract
+
+ # Number of characters contained in self.
+ fun length: Int is abstract
+
+ # Create a substring.
+ #
+ # assert "abcd".substring(1, 2) == "bc"
+ # assert "abcd".substring(-1, 2) == "a"
+ # assert "abcd".substring(1, 0) == ""
+ # assert "abcd".substring(2, 5) == "cd"
+ #
+ # A `from` index < 0 will be replaced by 0.
+ # Unless a `count` value is > 0 at the same time.
+ # In this case, `from += count` and `count -= from`.
+ fun substring(from: Int, count: Int): SELFTYPE is abstract
+
+ # Concatenates `o` to `self`
+ fun +(o: Text): SELFTYPE is abstract
+
+ # Auto-concatenates self `i` times
+ fun *(i: Int): SELFTYPE is abstract
+
+ # Is the current Text empty (== "")
+ # assert "".is_empty
+ # assert not "foo".is_empty
+ fun is_empty: Bool do return self.length == 0
+
+ # Returns an empty Text of the right type
+ fun empty: SELFTYPE is abstract
+
+ # Gets the first char of the Text
+ #
+ # DEPRECATED : Use self.chars.first instead
+ fun first: Char do return self.chars[0]
+
+ # Access a character at `index` in the string.
+ #
+ # assert "abcd"[2] == 'c'
+ #
+ # DEPRECATED : Use self.chars.[] instead
+ fun [](index: Int): Char do return self.chars[index]
+
+ # Gets the index of the first occurence of 'c'
+ #
+ # Returns -1 if not found
+ #
+ # DEPRECATED : Use self.chars.index_of instead
+ fun index_of(c: Char): Int
+ do
+ return index_of_from(c, 0)
+ end
+
+ # Gets the last char of self
+ #
+ # DEPRECATED : Use self.chars.last instead
+ fun last: Char do return self.chars[length-1]
+
+ # Gets the index of the first occurence of ´c´ starting from ´pos´
+ #
+ # Returns -1 if not found
+ #
+ # DEPRECATED : Use self.chars.index_of_from instead
+ fun index_of_from(c: Char, pos: Int): Int
+ do
+ var iter = self.chars.iterator_from(pos)
+ while iter.is_ok do
+ if iter.item == c then return iter.index
+ end
+ return -1
+ end
+
+ # Gets the last index of char ´c´
+ #
+ # Returns -1 if not found
+ #
+ # DEPRECATED : Use self.chars.last_index_of instead
+ fun last_index_of(c: Char): Int
+ do
+ return last_index_of_from(c, length - 1)
+ end
+
+ # Return a null terminated char *
+ fun to_cstring: NativeString do return flatten.to_cstring
+
+ # The index of the last occurrence of an element starting from pos (in reverse order).
+ # Example :
+ # assert "/etc/bin/test/test.nit".last_index_of_from('/', length-1) == 13
+ # assert "/etc/bin/test/test.nit".last_index_of_from('/', 12) == 8
+ #
+ # Returns -1 if not found
+ #
+ # DEPRECATED : Use self.chars.last_index_of_from instead
+ fun last_index_of_from(item: Char, pos: Int): Int
+ do
+ var iter = self.chars.reverse_iterator_from(pos)
+ while iter.is_ok do
+ if iter.item == item then return iter.index
+ iter.next
+ end
+ return -1
+ end
+
+ # Gets an iterator on the chars of self
+ #
+ # DEPRECATED : Use self.chars.iterator instead
+ fun iterator: Iterator[Char]
+ do
+ return self.chars.iterator
+ end
+
+ # Is 'c' contained in self ?
+ #
+ # DEPRECATED : Use self.chars.has instead
+ fun has(c: Char): Bool
+ do
+ return self.chars.has(c)
+ end
+
+ # Gets an Array containing the chars of self
+ #
+ # DEPRECATED : Use self.chars.to_a instead
+ fun to_a: Array[Char] do return chars.to_a
+
+ # Create a substring from `self` beginning at the `from` position
+ #
+ # assert "abcd".substring_from(1) == "bcd"
+ # assert "abcd".substring_from(-1) == "abcd"
+ # assert "abcd".substring_from(2) == "cd"
+ #
+ # As with substring, a `from` index < 0 will be replaced by 0
+ fun substring_from(from: Int): SELFTYPE
+ do
+ if from > self.length then return empty
+ if from < 0 then from = 0
+ return substring(from, length - from)
+ end
+
+ # Returns a reversed version of self
+ fun reversed: SELFTYPE is abstract
+
+ # Does self have a substring `str` starting from position `pos`?
+ #
+ # assert "abcd".has_substring("bc",1) == true
+ # assert "abcd".has_substring("bc",2) == false
+ fun has_substring(str: String, pos: Int): Bool
+ do
+ var myiter = self.chars.iterator_from(pos)
+ var itsiter = str.iterator
+ while myiter.is_ok and itsiter.is_ok do
+ if myiter.item != itsiter.item then return false
+ myiter.next
+ itsiter.next
+ end
+ if itsiter.is_ok then return false
+ return true
+ end
+
+ # Is this string prefixed by `prefix`?
+ #
+ # assert "abcd".has_prefix("ab") == true
+ # assert "abcbc".has_prefix("bc") == false
+ # assert "ab".has_prefix("abcd") == false
+ fun has_prefix(prefix: String): Bool do return has_substring(prefix,0)
+
+ # Is this string suffixed by `suffix`?
+ #
+ # assert "abcd".has_suffix("abc") == false
+ # assert "abcd".has_suffix("bcd") == true
+ fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
+
+ # If `self` contains only digits, return the corresponding integer
+ #
+ # assert "123".to_i == 123
+ # assert "-1".to_i == -1
+ fun to_i: Int
+ do
+ # Shortcut
+ return to_s.to_cstring.atoi
+ end
+
+ # If `self` contains a float, return the corresponding float
+ #
+ # assert "123".to_f == 123.0
+ # assert "-1".to_f == -1.0
+ # assert "-1.2e-3".to_f == -0.0012
+ fun to_f: Float
+ do
+ # Shortcut
+ return to_s.to_cstring.atof
+ end
+
+ # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
+ fun to_hex: Int do return a_to(16)
+
+ # If `self` contains only digits and letters, return the corresponding integer in a given base
+ #
+ # assert "120".a_to(3) == 15
+ fun a_to(base: Int) : Int
+ do
+ var i = 0
+ var neg = false
+
+ for c in self.chars
+ do
+ var v = c.to_i
+ if v > base then
+ if neg then
+ return -i
+ else
+ return i
+ end
+ else if v < 0 then
+ neg = true
+ else
+ i = i * base + v
+ end
+ end
+ if neg then
+ return -i
+ else
+ return i
+ end
+ end
+
+ # Returns `true` if the string contains only Numeric values (and one "," or one "." character)
+ #
+ # assert "123".is_numeric == true
+ # assert "1.2".is_numeric == true
+ # assert "1,2".is_numeric == true
+ # assert "1..2".is_numeric == false
+ fun is_numeric: Bool
+ do
+ var has_point_or_comma = false
+ for i in self.chars
+ do
+ if not i.is_numeric
+ then
+ if (i == '.' or i == ',') and not has_point_or_comma
+ then
+ has_point_or_comma = true
+ else
+ return false
+ end
+ end
+ end
+ return true
+ end
+
+ # A upper case version of `self`
+ #
+ # assert "Hello World!".to_upper == "HELLO WORLD!"
+ fun to_upper: SELFTYPE is abstract
+
+ # A lower case version of `self`
+ #
+ # assert "Hello World!".to_lower == "hello world!"
+ fun to_lower : SELFTYPE is abstract
+
+ # Removes the whitespaces at the beginning of self
+ fun l_trim: SELFTYPE
+ do
+ var iter = self.chars.iterator
+ while iter.is_ok do
+ if iter.item.ascii > 32 then break
+ iter.next
+ end
+ if iter.index == length then return self.empty
+ return self.substring_from(iter.index)
+ end
+
+ # Removes the whitespaces at the end of self
+ fun r_trim: SELFTYPE
+ do
+ var iter = self.chars.reverse_iterator
+ while iter.is_ok do
+ if iter.item.ascii > 32 then break
+ iter.next
+ end
+ if iter.index == length then return self.empty
+ return self.substring(0, iter.index + 1)
+ end
+
+ # Trims trailing and preceding white spaces
+ # A whitespace is defined as any character which ascii value is less than or equal to 32
+ #
+ # assert " Hello World ! ".trim == "Hello World !"
+ # assert "\na\nb\tc\t".trim == "a\nb\tc"
+ fun trim: SELFTYPE do return (self.l_trim).r_trim
+
+ # Mangle a string to be a unique string only made of alphanumeric characters
+ fun to_cmangle: String
+ do
+ var res = new FlatBuffer
+ var underscore = false
+ for c in self.chars do
+ if (c >= 'a' and c <= 'z') or (c >='A' and c <= 'Z') then
+ res.add(c)
+ underscore = false
+ continue
+ end
+ if underscore then
+ res.append('_'.ascii.to_s)
+ res.add('d')
+ end
+ if c >= '0' and c <= '9' then
+ res.add(c)
+ underscore = false
+ else if c == '_' then
+ res.add(c)
+ underscore = true
+ else
+ res.add('_')
+ res.append(c.ascii.to_s)
+ res.add('d')
+ underscore = false
+ end
+ end
+ return res.to_s
+ end
+
+ # Escape " \ ' and non printable characters using the rules of literal C strings and characters
+ #
+ # assert "abAB12<>&".escape_to_c == "abAB12<>&"
+ # assert "\n\"'\\".escape_to_c == "\\n\\\"\\'\\\\"
+ fun escape_to_c: String
+ do
+ var b = new FlatBuffer
+ for c in self.chars do
+ if c == '\n' then
+ b.append("\\n")
+ else if c == '\0' then
+ b.append("\\0")
+ else if c == '"' then
+ b.append("\\\"")
+ else if c == '\'' then
+ b.append("\\\'")
+ else if c == '\\' then
+ b.append("\\\\")
+ else if c.ascii < 32 then
+ b.append("\\{c.ascii.to_base(8, false)}")
+ else
+ b.add(c)
+ end
+ end
+ return b.to_s
+ end
+
+ # Escape additionnal characters
+ # The result might no be legal in C but be used in other languages
+ #
+ # assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}"
+ fun escape_more_to_c(chars: String): String
+ do
+ var b = new FlatBuffer
+ for c in escape_to_c do
+ if chars.chars.has(c) then
+ b.add('\\')
+ end
+ b.add(c)
+ end
+ return b.to_s
+ end
+
+ # Escape to c plus braces
+ #
+ # assert "\n\"'\\\{\}".escape_to_nit == "\\n\\\"\\'\\\\\\\{\\\}"
+ fun escape_to_nit: String do return escape_more_to_c("\{\}")
+
+ # Return a string where Nit escape sequences are transformed.
+ #
+ # Example:
+ # var s = "\\n"
+ # assert s.length == 2
+ # var u = s.unescape_nit
+ # assert u.length == 1
+ # assert u[0].ascii == 10 # (the ASCII value of the "new line" character)
+ fun unescape_nit: String
+ do
+ var res = new FlatBuffer.with_capacity(self.length)
+ var was_slash = false
+ for c in self do
+ if not was_slash then
+ if c == '\\' then
+ was_slash = true
+ else
+ res.add(c)
+ end
+ continue
+ end
+ was_slash = false
+ if c == 'n' then
+ res.add('\n')
+ else if c == 'r' then
+ res.add('\r')
+ else if c == 't' then
+ res.add('\t')
+ else if c == '0' then
+ res.add('\0')
+ else
+ res.add(c)
+ end
+ end
+ return res.to_s
+ end
+
+ redef fun ==(o)
+ do
+ if o == null then return false
+ if not o isa Text then return false
+ if self.is_same_instance(o) then return true
+ if self.length != o.length then return false
+ return self.chars == o.chars
+ end
+
+ redef fun <(o)
+ do
+ return self.chars < o.chars
+ end
+
+ # Flat representation of self
+ fun flatten: FlatText is abstract
+
+ redef fun hash
+ do
+ if hash_cache == null then
+ # djb2 hash algorithm
+ var h = 5381
+ var i = length - 1
+
+ for char in self.chars do
+ h = (h * 32) + h + char.ascii
+ i -= 1
+ end
+
+ hash_cache = h
+ end
+ return hash_cache.as(not null)
+ end
+
+end
+
+# All kinds of array-based text representations.
+abstract class FlatText
+ super Text
+
+ private var items: NativeString
+
+ # Real items, used as cache for to_cstring is called
+ private var real_items: nullable NativeString = null
+
+ redef var length: Int
+
+ init do end
+
+ redef fun output
+ do
+ var i = 0
+ while i < length do
+ items[i].output
+ i += 1
+ end
+ end
+
+ redef fun flatten do return self
+end
+
+# Abstract class for the SequenceRead compatible
+# views on String and Buffer objects
+abstract class StringCharView
+ super SequenceRead[Char]
+ super Comparable
+
+ type SELFTYPE: Text
+
+ redef type OTHER: StringCharView
+
+ private var target: SELFTYPE
+
+ private init(tgt: SELFTYPE)
+ do
+ target = tgt
+ end
+
+ redef fun is_empty do return target.is_empty
+
+ redef fun length do return target.length
+
+ redef fun iterator: IndexedIterator[Char] do return self.iterator_from(0)
+
+ # Gets a new Iterator starting at position `pos`
+ #
+ # Ex :
+ # var iter = "abcd".iterator_from(2)
+ # while iter.is_ok do
+ # printn iter.item
+ # iter.next
+ # end
+ #
+ # Outputs : cd
+ fun iterator_from(pos: Int): IndexedIterator[Char] is abstract
+
+ # Gets an iterator starting at the end and going backwards
+ #
+ # Ex :
+ # var reviter = "now step live...".reverse_iterator
+ # while reviter.is_ok do
+ # printn reviter.item
+ # reviter.next
+ # end
+ #
+ # Outputs : ...evil pets won
+ fun reverse_iterator: IndexedIterator[Char] do return self.reverse_iterator_from(self.length - 1)
+
+ # Gets an iterator on the chars of self starting from `pos`
+ #
+ # Ex :
+ # var iter = "abcd".reverse_iterator_from(1)
+ # while iter.is_ok do
+ # printn iter.item
+ # iter.next
+ # end
+ #
+ # Outputs : ba
+ fun reverse_iterator_from(pos: Int): IndexedIterator[Char] is abstract
+
+ redef fun has(c: Char): Bool
+ do
+ for i in self do
+ if i == c then return true
+ end
+ return false
+ end
+
+ redef fun ==(other)
+ do
+ if other == null then return false
+ if not other isa StringCharView then return false
+ var other_chars = other.iterator
+ for i in self do
+ if i != other_chars.item then return false
+ other_chars.next
+ end
+ return true
+ end
+
+ redef fun <(other)
+ do
+ var self_chars = self.iterator
+ var other_chars = other.iterator
+
+ while self_chars.is_ok and other_chars.is_ok do
+ if self_chars.item < other_chars.item then return true
+ if self_chars.item > other_chars.item then return false
+ self_chars.next
+ other_chars.next
+ end
+
+ if self_chars.is_ok then
+ return false
+ else
+ return true
+ end
+ end
+end
+
+# View on Buffer objects, extends Sequence
+# for mutation operations
+abstract class BufferCharView
+ super StringCharView
+ super Sequence[Char]
+
+ redef type SELFTYPE: Buffer
+
+end
+
+abstract class String
+ super Text
+
+ redef type SELFTYPE: String
+
+ redef fun to_s do return self
+
+end
+
+# Immutable strings of characters.
+class FlatString
+ super FlatText
+ super String
+
+ redef type SELFTYPE: FlatString
+ redef type SELFVIEW: FlatStringCharView
+
+ # Index in _items of the start of the string
+ private var index_from: Int
+
+ # Indes in _items of the last item of the string
+ private var index_to: Int
+
+ redef var chars: SELFVIEW = new FlatStringCharView(self)
+
+ ################################################
+ # AbstractString specific methods #
+ ################################################
+
+ redef fun [](index) do
+ assert index >= 0
+ # Check that the index (+ index_from) is not larger than indexTo
+ # In other terms, if the index is valid
+ assert (index + index_from) <= index_to
+ return items[index + index_from]
+ end
+
+ redef fun reversed
+ do
+ var native = calloc_string(self.length + 1)
+ var reviter = chars.reverse_iterator
+ var pos = 0
+ while reviter.is_ok do
+ native[pos] = reviter.item
+ pos += 1
+ reviter.next
+ end
+ return native.to_s_with_length(self.length)
+ end
+
+ redef fun substring(from, count)
+ do
+ assert count >= 0
+
+ if from < 0 then
+ count += from
+ if count < 0 then count = 0
+ from = 0
+ end
+
+ var realFrom = index_from + from
+
+ if (realFrom + count) > index_to then return new FlatString.with_infos(items, index_to - realFrom + 1, realFrom, index_to)
+
+ if count == 0 then return empty
+
+ var to = realFrom + count - 1
+
+ return new FlatString.with_infos(items, to - realFrom + 1, realFrom, to)
+ end
+
+ redef fun empty do return "".as(FlatString)
+
+ redef fun to_upper
+ do
+ var outstr = calloc_string(self.length + 1)
+ var out_index = 0
+
+ var myitems = self.items
+ var index_from = self.index_from
+ var max = self.index_to
+
+ while index_from <= max do
+ outstr[out_index] = myitems[index_from].to_upper
+ out_index += 1
+ index_from += 1
+ end
+
+ outstr[self.length] = '\0'
+
+ return outstr.to_s_with_length(self.length)
+ end
+
+ redef fun to_lower
+ do
+ var outstr = calloc_string(self.length + 1)
+ var out_index = 0
+
+ var myitems = self.items
+ var index_from = self.index_from
+ var max = self.index_to
+
+ while index_from <= max do
+ outstr[out_index] = myitems[index_from].to_lower
+ out_index += 1
+ index_from += 1
+ end
+
+ outstr[self.length] = '\0'
+
+ return outstr.to_s_with_length(self.length)
+ end
+
+ redef fun output
+ do
+ var i = self.index_from
+ var imax = self.index_to
+ while i <= imax do
+ items[i].output
+ i += 1
+ end
+ end
+
+ ##################################################
+ # String Specific Methods #
+ ##################################################
+
+ private init with_infos(items: NativeString, len: Int, from: Int, to: Int)
+ do
+ self.items = items
+ length = len
+ index_from = from
+ index_to = to
+ end
+
+ # Return a null terminated char *
+ redef fun to_cstring: NativeString
+ do
+ if real_items != null then return real_items.as(not null)
+ if index_from > 0 or index_to != items.cstring_length - 1 then
+ var newItems = calloc_string(length + 1)
+ self.items.copy_to(newItems, length, index_from, 0)
+ newItems[length] = '\0'
+ self.real_items = newItems
+ return newItems
+ end
+ return items
+ end
+
+ redef fun ==(other)
+ do
+ if not other isa FlatString then return super
+
+ if self.object_id == other.object_id then return true
+
+ var my_length = length
+
+ if other.length != my_length then return false
+
+ var my_index = index_from
+ var its_index = other.index_from
+
+ var last_iteration = my_index + my_length
+
+ var itsitems = other.items
+ var myitems = self.items
+
+ while my_index < last_iteration do
+ if myitems[my_index] != itsitems[its_index] then return false
+ my_index += 1
+ its_index += 1
+ end
+
+ return true
+ end
+
+ # The comparison between two strings is done on a lexicographical basis
+ #
+ # assert ("aa" < "b") == true
+ redef fun <(other)
+ do
+ if not other isa FlatString then return super
+
+ if self.object_id == other.object_id then return false
+
+ var my_curr_char : Char
+ var its_curr_char : Char
+
+ var curr_id_self = self.index_from
+ var curr_id_other = other.index_from
+
+ var my_items = self.items
+ var its_items = other.items
+
+ var my_length = self.length
+ var its_length = other.length
+
+ var max_iterations = curr_id_self + my_length
+
+ while curr_id_self < max_iterations do
+ my_curr_char = my_items[curr_id_self]
+ its_curr_char = its_items[curr_id_other]
+
+ if my_curr_char != its_curr_char then
+ if my_curr_char < its_curr_char then return true
+ return false
+ end
+
+ curr_id_self += 1
+ curr_id_other += 1
+ end
+
+ return my_length < its_length
+ end
+
+ # The concatenation of `self` with `s`
+ #
+ # assert "hello " + "world!" == "hello world!"
+ redef fun +(s)
+ do
+ var my_length = self.length
+ var its_length = s.length
+
+ var total_length = my_length + its_length
+
+ var target_string = calloc_string(my_length + its_length + 1)
+
+ self.items.copy_to(target_string, my_length, index_from, 0)
+ if s isa FlatString then
+ s.items.copy_to(target_string, its_length, s.index_from, my_length)
+ else if s isa FlatBuffer then
+ s.items.copy_to(target_string, its_length, 0, my_length)
+ else
+ var curr_pos = my_length
+ for i in s.chars do
+ target_string[curr_pos] = i
+ curr_pos += 1
+ end
+ end
+
+ target_string[total_length] = '\0'
+
+ return target_string.to_s_with_length(total_length)
+ end
+
+ # assert "abc"*3 == "abcabcabc"
+ # assert "abc"*1 == "abc"
+ # assert "abc"*0 == ""
+ redef fun *(i)
+ do
+ assert i >= 0
+
+ var my_length = self.length
+
+ var final_length = my_length * i
+
+ var my_items = self.items
+
+ var target_string = calloc_string((final_length) + 1)
+
+ target_string[final_length] = '\0'
+
+ var current_last = 0
+
+ for iteration in [1 .. i] do
+ my_items.copy_to(target_string, my_length, 0, current_last)
+ current_last += my_length
+ end
+
+ return target_string.to_s_with_length(final_length)
+ end
+
+ redef fun hash
+ do
+ if hash_cache == null then
+ # djb2 hash algorythm
+ var h = 5381
+ var i = length - 1
+
+ var myitems = items
+ var strStart = index_from
+
+ i += strStart
+
+ while i >= strStart do
+ h = (h * 32) + h + self.items[i].ascii
+ i -= 1
+ end
+
+ hash_cache = h
+ end
+
+ return hash_cache.as(not null)
+ end
+end
+
+private class FlatStringReverseIterator
+ super IndexedIterator[Char]
+
+ var target: FlatString
+
+ var target_items: NativeString
+
+ var curr_pos: Int
+
+ init with_pos(tgt: FlatString, pos: Int)
+ do
+ target = tgt
+ target_items = tgt.items
+ curr_pos = pos + tgt.index_from
+ end
+
+ redef fun is_ok do return curr_pos >= 0
+
+ redef fun item do return target_items[curr_pos]
+
+ redef fun next do curr_pos -= 1
+
+ redef fun index do return curr_pos - target.index_from
+
+end
+
+private class FlatStringIterator
+ super IndexedIterator[Char]
+
+ var target: FlatString
+
+ var target_items: NativeString
+
+ var curr_pos: Int
+
+ init with_pos(tgt: FlatString, pos: Int)
+ do
+ target = tgt
+ target_items = tgt.items
+ curr_pos = pos + target.index_from
+ end
+
+ redef fun is_ok do return curr_pos <= target.index_to