# assert "abcd".has_suffix("bcd") == true
fun has_suffix(suffix: String): Bool do return has_substring(suffix, length - suffix.length)
- # Returns a copy of `self` minus all occurences of `c`
- #
- # assert "__init__".remove_all('_') == "init"
- fun remove_all(c: Char): String do
- var b = new Buffer
- for i in chars do if i != c then b.add i
- return b.to_s
- end
-
# Returns `self` as the corresponding integer
#
# assert "123".to_i == 123
if c >= '0' and c <= '9' then
res.add('_')
- res.append(c.ascii.to_s)
+ res.append(c.code_point.to_s)
res.add('d')
start = 1
end
continue
end
if underscore then
- res.append('_'.ascii.to_s)
+ res.append('_'.code_point.to_s)
res.add('d')
end
if c >= '0' and c <= '9' then
underscore = true
else
res.add('_')
- res.append(c.ascii.to_s)
+ res.append(c.code_point.to_s)
res.add('d')
underscore = false
end
end
if underscore then
- res.append('_'.ascii.to_s)
+ res.append('_'.code_point.to_s)
res.add('d')
end
return res.to_s
# Three digits are always used to avoid following digits to be interpreted as an element
# of the octal sequence.
#
- # assert "{0.ascii}{1.ascii}{8.ascii}{31.ascii}{32.ascii}".escape_to_c == "\\000\\001\\010\\037 "
+ # assert "{0.code_point}{1.code_point}{8.code_point}{31.code_point}{32.code_point}".escape_to_c == "\\000\\001\\010\\037 "
#
# The exceptions are the common `\t` and `\n`.
fun escape_to_c: String
b.append("\\n")
else if c == '\t' then
b.append("\\t")
- else if c == '\0' then
- b.append("\\000")
else if c == '"' then
b.append("\\\"")
else if c == '\'' then
b.append("\\\'")
else if c == '\\' then
b.append("\\\\")
- else if c.ascii < 32 then
+ else if c.code_point < 32 then
b.add('\\')
- var oct = c.ascii.to_base(8, false)
+ var oct = c.code_point.to_base(8, false)
# Force 3 octal digits since it is the
# maximum allowed in the C specification
if oct.length == 1 then
else if c == ':' or c == ' ' or c == '#' then
b.add('\\')
b.add(c)
- else if c.ascii < 32 or c == ';' or c == '|' or c == '\\' or c == '=' then
- b.append("?{c.ascii.to_base(16, false)}")
+ else if c.code_point < 32 or c == ';' or c == '|' or c == '\\' or c == '=' then
+ b.append("?{c.code_point.to_base(16, false)}")
else
b.add(c)
end
# assert s.length == 2
# var u = s.unescape_nit
# assert u.length == 1
- # assert u.chars[0].ascii == 10 # (the ASCII value of the "new line" character)
+ # assert u.chars[0].code_point == 10 # (the ASCII value of the "new line" character)
fun unescape_nit: String
do
var res = new Buffer.with_cap(self.length)
# assert "aBc09-._~".to_percent_encoding == "aBc09-._~"
# assert "%()< >".to_percent_encoding == "%25%28%29%3c%20%3e"
# assert ".com/post?e=asdf&f=123".to_percent_encoding == ".com%2fpost%3fe%3dasdf%26f%3d123"
+ # assert "éあいう".to_percent_encoding == "%c3%a9%e3%81%82%e3%81%84%e3%81%86"
fun to_percent_encoding: String
do
var buf = new Buffer
c == '_' or c == '~'
then
buf.add c
- else buf.append "%{c.ascii.to_hex}"
+ else
+ var bytes = c.to_s.bytes
+ for b in bytes do buf.append "%{b.to_i.to_hex}"
+ end
end
return buf.to_s
# assert "%25%28%29%3C%20%3E".from_percent_encoding == "%()< >"
# assert "incomplete %".from_percent_encoding == "incomplete ?"
# assert "invalid % usage".from_percent_encoding == "invalid ? usage"
+ # assert "%c3%a9%e3%81%82%e3%81%84%e3%81%86".from_percent_encoding == "éあいう"
fun from_percent_encoding: String
do
- var buf = new Buffer
+ var len = bytelen
+ var has_percent = false
+ for c in chars do
+ if c == '%' then
+ len -= 2
+ has_percent = true
+ end
+ end
+
+ # If no transformation is needed, return self as a string
+ if not has_percent then return to_s
+ var buf = new NativeString(len)
var i = 0
+ var l = 0
while i < length do
var c = chars[i]
if c == '%' then
if i + 2 >= length then
# What follows % has been cut off
- buf.add '?'
+ buf[l] = '?'.ascii
else
i += 1
var hex_s = substring(i, 2)
if hex_s.is_hex then
var hex_i = hex_s.to_hex
- buf.add hex_i.ascii
+ buf[l] = hex_i.to_b
i += 1
else
# What follows a % is not Hex
- buf.add '?'
+ buf[l] = '?'.ascii
i -= 1
end
end
- else buf.add c
+ else buf[l] = c.ascii
i += 1
+ l += 1
end
- return buf.to_s
+ return buf.to_s_with_length(l)
end
# Escape the characters `<`, `>`, `&`, `"`, `'` and `/` as HTML/XML entity references.
for i in [0..length[ do
var char = chars[i]
- h = (h << 5) + h + char.ascii
+ h = (h << 5) + h + char.code_point
end
hash_cache = h
redef fun is_empty do return target.is_empty
- redef fun length do return target.length
+ redef fun length do return target.bytelen
redef fun iterator do return self.iterator_from(0)
redef class Char
+ # Returns a sequence with the UTF-8 bytes of `self`
+ #
+ # assert 'a'.bytes == [0x61u8]
+ # assert 'ま'.bytes == [0xE3u8, 0x81u8, 0xBEu8]
+ fun bytes: SequenceRead[Byte] do return to_s.bytes
+
# Length of `self` in a UTF-8 String
private fun u8char_len: Int do
- var c = self.ascii
+ var c = self.code_point
if c < 0x80 then return 1
if c <= 0x7FF then return 2
if c <= 0xFFFF then return 3
# Returns `self` as a String of `length`.
fun to_s_with_length(length: Int): String is abstract
+
+ # Returns `self` as a String with `bytelen` and `length` set
+ #
+ # SEE: `abstract_text::Text` for more infos on the difference
+ # between `Text::bytelen` and `Text::length`
+ fun to_s_full(bytelen, unilen: Int): String is abstract
end
redef class NativeArray[E]