import collection
intrude import collection::array
+in "C" `{
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+`}
+
# High-level abstraction for all text representations
abstract class Text
super Comparable
# assert "hello".chars.to_a == ['h', 'e', 'l', 'l', 'o']
fun chars: SequenceRead[Char] is abstract
+ # Gets a view on the bytes of the Text object
+ #
+ # assert "hello".bytes.to_a == [104u8, 101u8, 108u8, 108u8, 111u8]
+ fun bytes: SequenceRead[Byte] is abstract
+
# Number of characters contained in self.
#
# assert "12345".length == 5
# assert "".length == 0
+ # assert "あいうえお".length == 5
fun length: Int is abstract
+ # Number of bytes in `self`
+ #
+ # assert "12345".bytelen == 5
+ # assert "あいうえお".bytelen == 15
+ fun bytelen: Int is abstract
+
# Create a substring.
#
# assert "abcd".substring(1, 2) == "bc"
# assert "abcd".substring(-1, 2) == "a"
# assert "abcd".substring(1, 0) == ""
# assert "abcd".substring(2, 5) == "cd"
+ # assert "あいうえお".substring(1,3) == "いうえ"
#
# A `from` index < 0 will be replaced by 0.
# Unless a `count` value is > 0 at the same time.
# assert "abcd".has_suffix("bcd") == true
fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
- # If `self` contains only digits, return the corresponding integer
+ # Returns a copy of `self` minus all occurences of `c`
+ #
+ # assert "__init__".remove_all('_') == "init"
+ fun remove_all(c: Char): String do
+ var b = new Buffer
+ for i in chars do if i != c then b.add i
+ return b.to_s
+ end
+
+ # Is `self` a well-formed Integer (i.e. parsable via `to_i`)
+ #
+ # assert "123".is_int
+ # assert "0b1011".is_int
+ # assert not "0x_".is_int
+ # assert not "0xGE".is_int
+ fun is_int: Bool do
+ var s = remove_all('_')
+ var pos = 0
+ while s[pos] == '-' do
+ pos += 1
+ end
+ s = s.substring_from(pos)
+ var rets = s.strip_numhead
+ if rets == "" then return false
+ var hd = get_numhead
+ if hd == "0x" or hd == "0X" then return rets.is_hex
+ if hd == "0b" or hd == "0B" then return rets.is_bin
+ if hd == "0o" or hd == "0O" then return rets.is_oct
+ return hd.is_dec
+ end
+
+ # Removes the numeric head of `self` if present
+ #
+ # intrude import standard::text::abstract_text
+ # assert "0xFFEF".strip_numhead == "FFEF"
+ # assert "0o7364".strip_numhead == "7364"
+ # assert "0b01001".strip_numhead == "01001"
+ # assert "98".strip_numhead == "98"
+ private fun strip_numhead: Text do
+ if get_numhead != "" then return substring_from(2)
+ return self
+ end
+
+ # Gets the numeric head of `self` if present
+ # Returns "" otherwise
+ #
+ # intrude import standard::text::abstract_text
+ # assert "0xFEFF".get_numhead == "0x"
+ # assert "0b01001".get_numhead == "0b"
+ # assert "0o872".get_numhead == "0o"
+ # assert "98".get_numhead == ""
+ private fun get_numhead: Text do
+ if self.length < 2 then return ""
+ var c = self[0]
+ if c != '0' then return ""
+ c = self[1]
+ if c == 'x' or c == 'b' or c == 'o' or
+ c == 'X' or c == 'B' or c == 'O' then return substring(0, 2)
+ return ""
+ end
+
+ # Removes the numeric extension if present
+ #
+ # intrude import standard::text::abstract_text
+ # assert "0xFEFFu8".strip_numext == "0xFEFF"
+ # assert "0b01001u8".strip_numext == "0b01001"
+ # assert "0o872u8".strip_numext == "0o872"
+ # assert "98".strip_numext == "98"
+ private fun strip_numext: Text do
+ var ext = get_numext
+ if ext != "" then return substring(0, length - ext.length)
+ return self
+ end
+
+ # Gets the numeric extension (i/u 8/16/32) in `self` is present
+ # Returns "" otherwise
+ #
+ # intrude import standard::text::abstract_text
+ # assert "0xFEFFu8".get_numext == "u8"
+ # assert "0b01001u8".get_numext == "u8"
+ # assert "0o872u8".get_numext == "u8"
+ # assert "98".get_numext == ""
+ private fun get_numext: Text do
+ var len = self.length
+ var max = if self.length < 3 then self.length else 3
+ for i in [1 .. max] do
+ var c = self[len - i]
+ if c == 'i' or c == 'u' then return substring_from(len - i)
+ end
+ return ""
+ end
+
+ # Returns `self` as the corresponding integer
#
# assert "123".to_i == 123
# assert "-1".to_i == -1
+ # assert "0x64".to_i == 100
+ # assert "0b1100_0011".to_i== 195
+ # assert "--12".to_i == 12
+ #
+ # REQUIRE: `self`.`is_int`
fun to_i: Int
do
- # Shortcut
- return to_s.to_cstring.atoi
+ assert self.is_int
+ var s = remove_all('_')
+ var val = 0
+ var neg = false
+ var pos = 0
+ while s[pos] == '-' do
+ neg = not neg
+ pos += 1
+ end
+ s = s.substring_from(pos)
+ if s.length >= 2 then
+ var s1 = s[1]
+ if s1 == 'x' or s1 == 'X' then
+ val = s.substring_from(2).to_hex
+ else if s1 == 'o' or s1 == 'O' then
+ val = s.substring_from(2).to_oct
+ else if s1 == 'b' or s1 == 'B' then
+ val = s.substring_from(2).to_bin
+ else if s1.is_numeric then
+ val = s.to_dec
+ end
+ else
+ val = s.to_dec
+ end
+ return if neg then -val else val
+ end
+
+ # Is `self` a valid integer ?
+ #
+ # assert "0xFE46u8".is_num
+ # assert "0b0100".is_num
+ # assert "0o645".is_num
+ # assert "897u8".is_num
+ fun is_num: Bool do
+ var prefix = get_numhead
+ var s = strip_numhead.strip_numext.remove_all('_')
+ if prefix != "" then
+ var c = prefix[1]
+ if c == 'x' or c == 'X' then return s.is_hex
+ if c == 'o' or c == 'O' then return s.is_oct
+ if c == 'b' or c == 'B' then return s.is_bin
+ end
+ return s.is_dec
+ end
+
+ # Is `self` is a properly formatted integer, returns the corresponding value
+ #
+ # assert "0xFEu8".to_num == 254u8
+ # assert "0b10_10".to_num != 10u8
+ fun to_num: nullable Numeric do
+ if not is_num then return null
+ var s = remove_all('_')
+ var ext = s.get_numext
+ var trunk = s.strip_numext
+ if trunk.strip_numhead == "" then return null
+ var trval = trunk.to_i
+ if ext == "u8" then
+ return trval.to_b
+ else if ext == "" then
+ return trval
+ else
+ return null
+ end
end
# If `self` contains a float, return the corresponding float
# assert "101101".to_bin == 45
fun to_bin: Int do return a_to(2)
+ # If `self` contains only digits '0' .. '9', return the corresponding integer.
+ #
+ # assert "108".to_dec == 108
+ fun to_dec: Int do return a_to(10)
+
# If `self` contains only digits and letters, return the corresponding integer in a given base
#
# assert "120".a_to(3) == 15
return true
end
+ # Returns `true` if the string contains only Binary digits
+ #
+ # assert "1101100".is_bin == true
+ # assert "1101020".is_bin == false
+ fun is_bin: Bool do
+ for i in chars do if i != '0' and i != '1' then return false
+ return true
+ end
+
+ # Returns `true` if the string contains only Octal digits
+ #
+ # assert "213453".is_oct == true
+ # assert "781".is_oct == false
+ fun is_oct: Bool do
+ for i in chars do if i < '0' or i > '7' then return false
+ return true
+ end
+
+ # Returns `true` if the string contains only Decimal digits
+ #
+ # assert "10839".is_dec == true
+ # assert "164F".is_dec == false
+ fun is_dec: Bool do
+ for i in chars do if i < '0' or i > '9' then return false
+ return true
+ end
+
# Are all letters in `self` upper-case ?
#
# assert "HELLO WORLD".is_upper == true
for i in [0..length[ do
var char = chars[i]
- h = h.lshift(5) + h + char.ascii
+ h = (h << 5) + h + char.ascii
end
hash_cache = h
i -= 1
var fmt_end = i
var ciph_len = fmt_end - ciph_st + 1
+
+ var arg_index = substring(ciph_st, ciph_len).to_i - 1
+ if arg_index >= args.length then continue
+
s.push substring(curr_st, fmt_st - curr_st)
- s.push args[substring(ciph_st, ciph_len).to_i - 1].to_s
+ s.push args[arg_index].to_s
curr_st = i + 1
end
i += 1
end
s.push substring(curr_st, length - curr_st)
- return s.to_s
+ return s.plain_to_s
end
# Copies `n` bytes from `self` at `src_offset` into `dest` starting at `dest_offset`
var mypos = src_offset
var itspos = dest_offset
while n > 0 do
- dest[itspos] = self.chars[mypos]
+ dest[itspos] = self.bytes[mypos]
itspos += 1
mypos += 1
n -= 1
# Real items, used as cache for to_cstring is called
private var real_items: nullable NativeString = null
- # Returns a char* starting at position `index_from`
+ # Returns a char* starting at position `first_byte`
#
# WARNING: If you choose to use this service, be careful of the following.
#
redef var length = 0
+ redef var bytelen = 0
+
redef fun output
do
var i = 0
end
# Abstract class for the SequenceRead compatible
-# views on String and Buffer objects
+# views on the chars of any Text
private abstract class StringCharView
super SequenceRead[Char]
redef fun reverse_iterator do return self.reverse_iterator_from(self.length - 1)
end
+# Abstract class for the SequenceRead compatible
+# views on the bytes of any Text
+private abstract class StringByteView
+ super SequenceRead[Byte]
+
+ type SELFTYPE: Text
+
+ var target: SELFTYPE
+
+ redef fun is_empty do return target.is_empty
+
+ redef fun length do return target.length
+
+ redef fun iterator do return self.iterator_from(0)
+
+ redef fun reverse_iterator do return self.reverse_iterator_from(target.bytelen - 1)
+end
+
# Immutable sequence of characters.
#
# String objects may be created using literals.
redef fun chars: Sequence[Char] is abstract
end
-# View on Buffer objects, extends Sequence
+# View for chars on Buffer objects, extends Sequence
# for mutation operations
private abstract class BufferCharView
super StringCharView
end
+# View for bytes on Buffer objects, extends Sequence
+# for mutation operations
+private abstract class BufferByteView
+ super StringByteView
+
+ redef type SELFTYPE: Buffer
+end
+
redef class Object
# User readable representation of `self`.
fun to_s: String do return inspect
redef class Byte
# C function to calculate the length of the `NativeString` to receive `self`
- private fun byte_to_s_len: Int is extern "native_byte_length_str"
+ private fun byte_to_s_len: Int `{
+ return snprintf(NULL, 0, "0x%02x", self);
+ `}
# C function to convert an nit Int to a NativeString (char*)
- private fun native_byte_to_s(nstr: NativeString, strlen: Int) is extern "native_byte_to_s"
+ private fun native_byte_to_s(nstr: NativeString, strlen: Int) `{
+ snprintf(nstr, strlen, "0x%02x", self);
+ `}
# Displayable byte in its hexadecimal form (0x..)
#
redef fun to_s do
var nslen = byte_to_s_len
var ns = new NativeString(nslen + 1)
- ns[nslen] = '\0'
+ ns[nslen] = 0u8
native_byte_to_s(ns, nslen + 1)
return ns.to_s_with_length(nslen)
end
redef class Int
# Wrapper of strerror C function
- private fun strerror_ext: NativeString is extern "strerror"
+ private fun strerror_ext: NativeString `{ return strerror(self); `}
# Returns a string describing error number
fun strerror: String do return strerror_ext.to_s
end
# C function to calculate the length of the `NativeString` to receive `self`
- private fun int_to_s_len: Int is extern "native_int_length_str"
+ private fun int_to_s_len: Int `{
+ return snprintf(NULL, 0, "%ld", self);
+ `}
# C function to convert an nit Int to a NativeString (char*)
- private fun native_int_to_s(nstr: NativeString, strlen: Int) is extern "native_int_to_s"
+ private fun native_int_to_s(nstr: NativeString, strlen: Int) `{
+ snprintf(nstr, strlen, "%ld", self);
+ `}
# return displayable int in base base and signed
fun to_base(base: Int, signed: Bool): String is abstract
end
redef class Char
+
+ # Length of `self` in a UTF-8 String
+ private fun u8char_len: Int do
+ var c = self.ascii
+ if c < 0x80 then return 1
+ if c <= 0x7FF then return 2
+ if c <= 0xFFFF then return 3
+ if c <= 0x10FFFF then return 4
+ # Bad character format
+ return 1
+ end
+
# assert 'x'.to_s == "x"
- redef fun to_s
- do
- var s = new Buffer.with_cap(1)
- s.chars[0] = self
- return s.to_s
+ redef fun to_s do
+ var ln = u8char_len
+ var ns = new NativeString(ln + 1)
+ u8char_tos(ns, ln)
+ return ns.to_s_with_length(ln)
end
+ private fun u8char_tos(r: NativeString, len: Int) `{
+ r[len] = '\0';
+ switch(len){
+ case 1:
+ r[0] = self;
+ break;
+ case 2:
+ r[0] = 0xC0 | ((self & 0x7C0) >> 6);
+ r[1] = 0x80 | (self & 0x3F);
+ break;
+ case 3:
+ r[0] = 0xE0 | ((self & 0xF000) >> 12);
+ r[1] = 0x80 | ((self & 0xFC0) >> 6);
+ r[2] = 0x80 | (self & 0x3F);
+ break;
+ case 4:
+ r[0] = 0xF0 | ((self & 0x1C0000) >> 18);
+ r[1] = 0x80 | ((self & 0x3F000) >> 12);
+ r[2] = 0x80 | ((self & 0xFC0) >> 6);
+ r[3] = 0x80 | (self & 0x3F);
+ break;
+ }
+ `}
+
# Returns true if the char is a numerical digit
#
# assert '0'.is_numeric
# assert '9'.is_numeric
# assert not 'a'.is_numeric
# assert not '?'.is_numeric
+ #
+ # FIXME: Works on ASCII-range only
fun is_numeric: Bool
do
return self >= '0' and self <= '9'
# assert 'Z'.is_alpha
# assert not '0'.is_alpha
# assert not '?'.is_alpha
+ #
+ # FIXME: Works on ASCII-range only
fun is_alpha: Bool
do
return (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z')
# assert '0'.is_alphanumeric
# assert '9'.is_alphanumeric
# assert not '?'.is_alphanumeric
+ #
+ # FIXME: Works on ASCII-range only
fun is_alphanumeric: Bool
do
return self.is_numeric or self.is_alpha
end
redef class Collection[E]
- # Concatenate elements.
+ # String representation of the content of the collection.
+ #
+ # The standard representation is the list of elements separated with commas.
+ #
+ # ~~~
+ # assert [1,2,3].to_s == "[1,2,3]"
+ # assert [1..3].to_s == "[1,2,3]"
+ # assert (new Array[Int]).to_s == "[]" # empty collection
+ # ~~~
+ #
+ # Subclasses may return a more specific string representation.
redef fun to_s
do
- return plain_to_s
+ return "[" + join(",") + "]"
end
- # Concatenate element without separators
+ # Concatenate elements without separators
+ #
+ # ~~~
+ # assert [1,2,3].plain_to_s == "123"
+ # assert [11..13].plain_to_s == "111213"
+ # assert (new Array[Int]).plain_to_s == "" # empty collection
+ # ~~~
fun plain_to_s: String
do
var s = new Buffer
return s.to_s
end
- # Concatenate and separate each elements with `sep`.
+ # Concatenate and separate each elements with `separator`.
+ #
+ # Only concatenate if `separator == null`.
#
- # assert [1, 2, 3].join(":") == "1:2:3"
- # assert [1..3].join(":") == "1:2:3"
- fun join(sep: Text): String
+ # assert [1, 2, 3].join(":") == "1:2:3"
+ # assert [1..3].join(":") == "1:2:3"
+ # assert [1..3].join == "123"
+ fun join(separator: nullable Text): String
do
if is_empty then return ""
# Concat other items
i.next
while i.is_ok do
- s.append(sep)
+ if separator != null then s.append(separator)
e = i.item
if e != null then s.append(e.to_s)
i.next
end
redef class Map[K,V]
- # Concatenate couple of 'key value'.
- # key and value are separated by `couple_sep`.
- # each couple is separated each couple with `sep`.
+ # Concatenate couples of key value.
+ # Key and value are separated by `couple_sep`.
+ # Couples are separated by `sep`.
#
- # var m = new ArrayMap[Int, String]
+ # var m = new HashMap[Int, String]
# m[1] = "one"
# m[10] = "ten"
# assert m.join("; ", "=") == "1=one; 10=ten"
- fun join(sep: String, couple_sep: String): String is abstract
+ fun join(sep, couple_sep: String): String is abstract
end
redef class Sys