fun substring(from: Int, count: Int): SELFTYPE is abstract
# Iterates on the substrings of self if any
- fun substrings: Iterator[Text] is abstract
+ fun substrings: Iterator[FlatText] is abstract
# Is the current Text empty (== "")
#
end
# Return a null terminated char *
- fun to_cstring: NativeString do return flatten.to_cstring
+ fun to_cstring: NativeString is abstract
# The index of the last occurrence of an element starting from pos (in reverse order).
#
# assert "ff".to_hex == 255
fun to_hex: Int do return a_to(16)
+ # If `self` contains only digits <= '7', return the corresponding integer.
+ #
+ # assert "714".to_oct == 460
+ fun to_oct: Int do return a_to(8)
+
+ # If `self` contains only '0' et '1', return the corresponding integer.
+ #
+ # assert "101101".to_bin == 45
+ fun to_bin: Int do return a_to(2)
+
# If `self` contains only digits and letters, return the corresponding integer in a given base
#
# assert "120".a_to(3) == 15
end
return true
end
-
+
# Removes the whitespaces at the beginning of self
#
# assert " \n\thello \n\t".l_trim == "hello \n\t"
#
- # A whitespace is defined as any character which ascii value is less than or equal to 32
+ # `Char::is_whitespace` determines what is a whitespace.
fun l_trim: SELFTYPE
do
var iter = self.chars.iterator
while iter.is_ok do
- if iter.item.ascii > 32 then break
+ if not iter.item.is_whitespace then break
iter.next
end
if iter.index == length then return self.empty
#
# assert " \n\thello \n\t".r_trim == " \n\thello"
#
- # A whitespace is defined as any character which ascii value is less than or equal to 32
+ # `Char::is_whitespace` determines what is a whitespace.
fun r_trim: SELFTYPE
do
var iter = self.chars.reverse_iterator
while iter.is_ok do
- if iter.item.ascii > 32 then break
+ if not iter.item.is_whitespace then break
iter.next
end
if iter.index < 0 then return self.empty
end
# Trims trailing and preceding white spaces
- # A whitespace is defined as any character which ascii value is less than or equal to 32
#
# assert " Hello World ! ".trim == "Hello World !"
# assert "\na\nb\tc\t".trim == "a\nb\tc"
+ #
+ # `Char::is_whitespace` determines what is a whitespace.
fun trim: SELFTYPE do return (self.l_trim).r_trim
+ # Is the string non-empty but only made of whitespaces?
+ #
+ # assert " \n\t ".is_whitespace == true
+ # assert " hello ".is_whitespace == false
+ # assert "".is_whitespace == false
+ #
+ # `Char::is_whitespace` determines what is a whitespace.
+ fun is_whitespace: Bool
+ do
+ if is_empty then return false
+ for c in self.chars do
+ if not c.is_whitespace then return false
+ end
+ return true
+ end
+
# Returns `self` removed from its last line terminator (if any).
#
# assert "Hello\n".chomp == "Hello"
# assert "\r\n\r\n".chomp == "\r\n"
# assert "\r\n\r".chomp == "\r\n"
#
- # Note: unlike with most IO methods like `IStream::read_line`,
+ # Note: unlike with most IO methods like `Reader::read_line`,
# a single `\r` is considered here to be a line terminator and will be removed.
fun chomp: SELFTYPE
do
# REQUIRE: `left >= 0.0 and left <= 1.0`
# ENSURE: `self.length <= length implies result.length == length`
# ENSURE: `self.length >= length implies result == self`
- fun justify(length: Int, left: Float): SELFTYPE
+ fun justify(length: Int, left: Float): String
do
var diff = length - self.length
- if diff <= 0 then return self
+ if diff <= 0 then return to_s
assert left >= 0.0 and left <= 1.0
var before = (diff.to_f * left).to_i
return " " * before + self + " " * (diff-before)
fun to_cmangle: String
do
if is_empty then return ""
- var res = new FlatBuffer
+ var res = new Buffer
var underscore = false
var start = 0
var c = chars[0]
#
# assert "abAB12<>&".escape_to_c == "abAB12<>&"
# assert "\n\"'\\".escape_to_c == "\\n\\\"\\'\\\\"
+ #
+ # Most non-printable characters (bellow ASCII 32) are escaped to an octal form `\nnn`.
+ # Three digits are always used to avoid following digits to be interpreted as an element
+ # of the octal sequence.
+ #
+ # assert "{0.ascii}{1.ascii}{8.ascii}{31.ascii}{32.ascii}".escape_to_c == "\\000\\001\\010\\037 "
+ #
+ # The exceptions are the common `\t` and `\n`.
fun escape_to_c: String
do
- var b = new FlatBuffer
+ var b = new Buffer
for i in [0..length[ do
var c = chars[i]
if c == '\n' then
b.append("\\n")
+ else if c == '\t' then
+ b.append("\\t")
else if c == '\0' then
- b.append("\\0")
+ b.append("\\000")
else if c == '"' then
b.append("\\\"")
else if c == '\'' then
else if c == '\\' then
b.append("\\\\")
else if c.ascii < 32 then
- b.append("\\{c.ascii.to_base(8, false)}")
+ b.add('\\')
+ var oct = c.ascii.to_base(8, false)
+ # Force 3 octal digits since it is the
+ # maximum allowed in the C specification
+ if oct.length == 1 then
+ b.add('0')
+ b.add('0')
+ else if oct.length == 2 then
+ b.add('0')
+ end
+ b.append(oct)
else
b.add(c)
end
# assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}"
fun escape_more_to_c(chars: String): String
do
- var b = new FlatBuffer
+ var b = new Buffer
for c in escape_to_c.chars do
if chars.chars.has(c) then
b.add('\\')
#
# assert "\n\"'\\\{\}0".escape_to_sh == "'\n\"'\\''\\\{\}0'"
fun escape_to_sh: String do
- var b = new FlatBuffer
+ var b = new Buffer
b.chars.add '\''
for i in [0..length[ do
var c = chars[i]
# These characters are `;`, `|`, `\`, and the non-printable ones.
# They will be rendered as `"?{hex}"`.
fun escape_to_mk: String do
- var b = new FlatBuffer
+ var b = new Buffer
for i in [0..length[ do
var c = chars[i]
if c == '$' then
# assert u.chars[0].ascii == 10 # (the ASCII value of the "new line" character)
fun unescape_nit: String
do
- var res = new FlatBuffer.with_capacity(self.length)
+ var res = new Buffer.with_cap(self.length)
var was_slash = false
for i in [0..length[ do
var c = chars[i]
# assert ".com/post?e=asdf&f=123".to_percent_encoding == ".com%2fpost%3fe%3dasdf%26f%3d123"
fun to_percent_encoding: String
do
- var buf = new FlatBuffer
+ var buf = new Buffer
for i in [0..length[ do
var c = chars[i]
# assert "invalid % usage".from_percent_encoding == "invalid ? usage"
fun from_percent_encoding: String
do
- var buf = new FlatBuffer
+ var buf = new Buffer
var i = 0
while i < length do
# assert "a&b-<>\"x\"/'".html_escape == "a&b-<>"x"/'"
#
# SEE: <https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content>
- fun html_escape: SELFTYPE
+ fun html_escape: String
do
- var buf = new FlatBuffer
+ var buf = new Buffer
for i in [0..length[ do
var c = chars[i]
return escape_more_to_c("|\{\}<>")
end
- # Flat representation of self
- fun flatten: FlatText is abstract
-
private var hash_cache: nullable Int = null
redef fun hash
return hash_cache.as(not null)
end
+ # Gives the formatted string back as a Nit string with `args` in place
+ #
+ # assert "This %1 is a %2.".format("String", "formatted String") == "This String is a formatted String."
+ # assert "\\%1 This string".format("String") == "\\%1 This string"
+ fun format(args: Object...): String do
+ var s = new Array[Text]
+ var curr_st = 0
+ var i = 0
+ while i < length do
+ # Skip escaped characters
+ if self[i] == '\\' then
+ i += 1
+ # In case of format
+ else if self[i] == '%' then
+ var fmt_st = i
+ i += 1
+ var ciph_st = i
+ while i < length and self[i].is_numeric do
+ i += 1
+ end
+ i -= 1
+ var fmt_end = i
+ var ciph_len = fmt_end - ciph_st + 1
+ s.push substring(curr_st, fmt_st - curr_st)
+ s.push args[substring(ciph_st, ciph_len).to_i - 1].to_s
+ curr_st = i + 1
+ end
+ i += 1
+ end
+ s.push substring(curr_st, length - curr_st)
+ return s.to_s
+ end
+
+ # Copies `n` bytes from `self` at `src_offset` into `dest` starting at `dest_offset`
+ #
+ # Basically a high-level synonym of NativeString::copy_to
+ #
+ # REQUIRE: `n` must be large enough to contain `len` bytes
+ #
+ # var ns = new NativeString(8)
+ # "Text is String".copy_to_native(ns, 8, 2, 0)
+ # assert ns.to_s_with_length(8) == "xt is St"
+ #
+ fun copy_to_native(dest: NativeString, n, src_offset, dest_offset: Int) do
+ var mypos = src_offset
+ var itspos = dest_offset
+ while n > 0 do
+ dest[itspos] = self.chars[mypos]
+ itspos += 1
+ mypos += 1
+ n -= 1
+ end
+ end
+
end
# All kinds of array-based text representations.
# Real items, used as cache for to_cstring is called
private var real_items: nullable NativeString = null
- redef var length: Int = 0
+ # Returns a char* starting at position `index_from`
+ #
+ # WARNING: If you choose to use this service, be careful of the following.
+ #
+ # Strings and NativeString are *ideally* always allocated through a Garbage Collector.
+ # Since the GC tracks the use of the pointer for the beginning of the char*, it may be
+ # deallocated at any moment, rendering the pointer returned by this function invalid.
+ # Any access to freed memory may very likely cause undefined behaviour or a crash.
+ # (Failure to do so will most certainly result in long and painful debugging hours)
+ #
+ # The only safe use of this pointer is if it is ephemeral (e.g. read in a C function
+ # then immediately return).
+ #
+ # As always, do not modify the content of the String in C code, if this is what you want
+ # copy locally the char* as Nit Strings are immutable.
+ private fun fast_cstring: NativeString is abstract
+
+ redef var length = 0
redef fun output
do
end
end
- redef fun flatten do return self
+ redef fun copy_to_native(dest, n, src_offset, dest_offset) do
+ items.copy_to(dest, n, src_offset, dest_offset)
+ end
end
# Abstract class for the SequenceRead compatible
abstract class String
super Text
- redef type SELFTYPE: String
+ redef type SELFTYPE: String is fixed
redef fun to_s do return self
# assert "helloworld".insert_at(" ", 5) == "hello world"
fun insert_at(s: String, pos: Int): SELFTYPE is abstract
- redef fun substrings: Iterator[String] is abstract
+ redef fun substrings is abstract
# Returns a reversed version of self
#
# assert "Hello World!".to_lower == "hello world!"
fun to_lower : SELFTYPE is abstract
- # Takes a camel case `self` and converts it to snake case
+ # Takes a camel case `self` and converts it to snake case
#
# assert "randomMethodId".to_snake_case == "random_method_id"
#
- # If `self` is upper, it is returned unchanged
+ # The rules are the following:
#
- # assert "RANDOM_METHOD_ID".to_snake_case == "RANDOM_METHOD_ID"
+ # An uppercase is always converted to a lowercase
#
- # If the identifier is prefixed by an underscore, the underscore is ignored
+ # assert "HELLO_WORLD".to_snake_case == "hello_world"
+ #
+ # An uppercase that follows a lowercase is prefixed with an underscore
+ #
+ # assert "HelloTheWORLD".to_snake_case == "hello_the_world"
+ #
+ # An uppercase that follows an uppercase and is followed by a lowercase, is prefixed with an underscore
#
- # assert "_privateField".to_snake_case == "_private_field"
+ # assert "HelloTHEWorld".to_snake_case == "hello_the_world"
+ #
+ # All other characters are kept as is; `self` does not need to be a proper CamelCased string.
+ #
+ # assert "=-_H3ll0Th3W0rld_-=".to_snake_case == "=-_h3ll0th3w0rld_-="
fun to_snake_case: SELFTYPE
do
- if self.is_upper then return self
+ if self.is_lower then return self
- var new_str = new FlatBuffer.with_capacity(self.length)
- var is_first_char = true
+ var new_str = new Buffer.with_cap(self.length)
+ var prev_is_lower = false
+ var prev_is_upper = false
for i in [0..length[ do
var char = chars[i]
- if is_first_char then
- new_str.add(char.to_lower)
- is_first_char = false
+ if char.is_lower then
+ new_str.add(char)
+ prev_is_lower = true
+ prev_is_upper = false
else if char.is_upper then
- new_str.add('_')
+ if prev_is_lower then
+ new_str.add('_')
+ else if prev_is_upper and i+1 < length and chars[i+1].is_lower then
+ new_str.add('_')
+ end
new_str.add(char.to_lower)
+ prev_is_lower = false
+ prev_is_upper = true
else
new_str.add(char)
+ prev_is_lower = false
+ prev_is_upper = false
end
end
-
+
return new_str.to_s
end
- # Takes a snake case `self` and converts it to camel case
+ # Takes a snake case `self` and converts it to camel case
#
# assert "random_method_id".to_camel_case == "randomMethodId"
#
do
if self.is_upper then return self
- var new_str = new FlatBuffer
+ var new_str = new Buffer
var is_first_char = true
var follows_us = false
fun capitalized: SELFTYPE do
if length == 0 then return self
- var buf = new FlatBuffer.with_capacity(length)
+ var buf = new Buffer.with_cap(length)
var curr = chars[0].to_upper
var prev = curr
# Indes in _items of the last item of the string
private var index_to: Int is noinit
- redef var chars: SequenceRead[Char] = new FlatStringCharView(self)
+ redef var chars = new FlatStringCharView(self) is lazy
redef fun [](index)
do
return native.to_s_with_length(self.length)
end
+ redef fun fast_cstring do return items.fast_cstring(index_from)
+
redef fun substring(from, count)
do
assert count >= 0
# String Specific Methods #
##################################################
- private init with_infos(items: NativeString, len: Int, from: Int, to: Int)
+ # Low-level creation of a new string with given data.
+ #
+ # `items` will be used as is, without copy, to retrieve the characters of the string.
+ # Aliasing issues is the responsibility of the caller.
+ private init with_infos(items: NativeString, length: Int, from: Int, to: Int)
do
self.items = items
- length = len
+ self.length = length
index_from = from
index_to = to
end
- redef fun to_cstring: NativeString
- do
+ redef fun to_cstring do
if real_items != null then
return real_items.as(not null)
else
abstract class Buffer
super Text
- redef type SELFTYPE: Buffer
+ # New `Buffer` factory, will return a concrete `Buffer` type with default capacity
+ new do return new FlatBuffer
+
+ # New `Buffer` factory, returns a concrete `Buffer` with a capacity of `i`
+ new with_cap(i: Int) do return new FlatBuffer.with_capacity(i)
+
+ redef type SELFTYPE: Buffer is fixed
# Specific implementations MUST set this to `true` in order to invalidate caches
protected var is_dirty = true
# Clears the buffer
#
- # var b = new FlatBuffer
+ # var b = new Buffer
# b.append "hello"
# assert not b.is_empty
# b.clear
# Adds the content of text `s` at the end of self
#
- # var b = new FlatBuffer
+ # var b = new Buffer
# b.append "hello"
# b.append "world"
# assert b == "helloworld"
# `self` is appended in such a way that `self` is repeated `r` times
#
- # var b = new FlatBuffer
+ # var b = new Buffer
# b.append "hello"
# b.times 3
# assert b == "hellohellohello"
# Reverses itself in-place
#
- # var b = new FlatBuffer
+ # var b = new Buffer
# b.append("hello")
# b.reverse
# assert b == "olleh"
# Changes each lower-case char in `self` by its upper-case variant
#
- # var b = new FlatBuffer
+ # var b = new Buffer
# b.append("Hello World!")
# b.upper
# assert b == "HELLO WORLD!"
# Changes each upper-case char in `self` by its lower-case variant
#
- # var b = new FlatBuffer
+ # var b = new Buffer
# b.append("Hello World!")
# b.lower
# assert b == "hello world!"
super FlatText
super Buffer
- redef type SELFTYPE: FlatBuffer
-
- redef var chars: Sequence[Char] = new FlatBufferCharView(self)
+ redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
private var capacity: Int = 0
+ redef fun fast_cstring do return items.fast_cstring(0)
+
redef fun substrings do return new FlatSubstringsIter(self)
# Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
length = 0
end
- redef fun empty do return new FlatBuffer
+ redef fun empty do return new Buffer
redef fun enlarge(cap)
do
capacity = c
end
- redef fun to_s: String
- do
+ redef fun to_s do
written = true
if length == 0 then items = new NativeString(1)
return new FlatString.with_infos(items, length, 0, length - 1)
# Create a new empty string.
init do end
+ # Low-level creation a new buffer with given data.
+ #
+ # `items` will be used as is, without copy, to store the characters of the buffer.
+ # Aliasing issues is the responsibility of the caller.
+ #
+ # If `items` is shared, `written` should be set to true after the creation
+ # so that a modification will do a copy-on-write.
+ private init with_infos(items: NativeString, capacity, length: Int)
+ do
+ self.items = items
+ self.length = length
+ self.capacity = capacity
+ end
+
# Create a new string copied from `s`.
init from(s: Text)
do
init with_capacity(cap: Int)
do
assert cap >= 0
- # _items = new NativeString.calloc(cap)
items = new NativeString(cap+1)
capacity = cap
length = 0
if from < 0 then from = 0
if count > length then count = length
if from < count then
- var r = new FlatBuffer.with_capacity(count - from)
- while from < count do
- r.chars.push(items[from])
- from += 1
- end
+ var len = count - from
+ var r_items = new NativeString(len)
+ items.copy_to(r_items, len, from, 0)
+ var r = new FlatBuffer.with_infos(r_items, len, len)
return r
else
- return new FlatBuffer
+ return new Buffer
end
end
end
end
+redef class Byte
+ # C function to calculate the length of the `NativeString` to receive `self`
+ private fun byte_to_s_len: Int is extern "native_byte_length_str"
+
+ # C function to convert an nit Int to a NativeString (char*)
+ private fun native_byte_to_s(nstr: NativeString, strlen: Int) is extern "native_byte_to_s"
+
+ # Displayable byte in its hexadecimal form (0x..)
+ #
+ # assert 1.to_b.to_s == "0x01"
+ # assert (-123).to_b.to_s == "0x85"
+ redef fun to_s do
+ var nslen = byte_to_s_len
+ var ns = new NativeString(nslen + 1)
+ ns[nslen] = '\0'
+ native_byte_to_s(ns, nslen + 1)
+ return ns.to_s_with_length(nslen)
+ end
+end
+
redef class Int
# Wrapper of strerror C function
- private fun strerror_ext: NativeString is extern `{
- return strerror(recv);
- `}
+ private fun strerror_ext: NativeString is extern "strerror"
# Returns a string describing error number
fun strerror: String do return strerror_ext.to_s
# assert 1.to_s == "1"
# assert (-123).to_s == "-123"
redef fun to_s do
+ # Fast case for common numbers
+ if self == 0 then return "0"
+ if self == 1 then return "1"
+
var nslen = int_to_s_len
var ns = new NativeString(nslen + 1)
ns[nslen] = '\0'
return p1 + "." + p2
end
-
- # `self` representation with `nb` digits after the '.'.
- #
- # assert 12.345.to_precision_native(1) == "12.3"
- # assert 12.345.to_precision_native(2) == "12.35"
- # assert 12.345.to_precision_native(3) == "12.345"
- # assert 12.345.to_precision_native(4) == "12.3450"
- fun to_precision_native(nb: Int): String import NativeString.to_s `{
- int size;
- char *str;
-
- size = snprintf(NULL, 0, "%.*f", (int)nb, recv);
- str = malloc(size + 1);
- sprintf(str, "%.*f", (int)nb, recv );
-
- return NativeString_to_s( str );
- `}
end
redef class Char
# assert 'x'.to_s == "x"
redef fun to_s
do
- var s = new FlatBuffer.with_capacity(1)
+ var s = new Buffer.with_cap(1)
s.chars[0] = self
return s.to_s
end
# Concatenate elements.
redef fun to_s
do
- var s = new FlatBuffer
+ return plain_to_s
+ end
+
+ # Concatenate element without separators
+ fun plain_to_s: String
+ do
+ var s = new Buffer
for e in self do if e != null then s.append(e.to_s)
return s.to_s
end
do
if is_empty then return ""
- var s = new FlatBuffer # Result
+ var s = new Buffer # Result
# Concat first item
var i = iterator
redef class Array[E]
# Fast implementation
- redef fun to_s
+ redef fun plain_to_s
do
var l = length
if l == 0 then return ""
end
end
+redef class NativeArray[E]
+ # Join all the elements using `to_s`
+ #
+ # REQUIRE: `self isa NativeArray[String]`
+ # REQUIRE: all elements are initialized
+ fun native_to_s: String
+ do
+ assert self isa NativeArray[String]
+ var l = length
+ var na = self
+ var i = 0
+ var sl = 0
+ var mypos = 0
+ while i < l do
+ sl += na[i].length
+ i += 1
+ mypos += 1
+ end
+ var ns = new NativeString(sl + 1)
+ ns[sl] = '\0'
+ i = 0
+ var off = 0
+ while i < mypos do
+ var tmp = na[i]
+ var tpl = tmp.length
+ if tmp isa FlatString then
+ tmp.items.copy_to(ns, tpl, tmp.index_from, off)
+ off += tpl
+ else
+ for j in tmp.substrings do
+ var s = j.as(FlatString)
+ var slen = s.length
+ s.items.copy_to(ns, slen, s.index_from, off)
+ off += slen
+ end
+ end
+ i += 1
+ end
+ return ns.to_s_with_length(sl)
+ end
+end
+
redef class Map[K,V]
# Concatenate couple of 'key value'.
# key and value are separated by `couple_sep`.
do
if is_empty then return ""
- var s = new FlatBuffer # Result
+ var s = new Buffer # Result
# Concat first item
var i = iterator
# Creates a new NativeString with a capacity of `length`
new(length: Int) is intern
+ # Returns a char* starting at `index`.
+ #
+ # WARNING: Unsafe for extern code, use only for temporary
+ # pointer manipulation purposes (e.g. write to file or such)
+ fun fast_cstring(index: Int): NativeString is intern
+
# Get char at `index`.
fun [](index: Int): Char is intern
end
redef class Sys
- private var args_cache: nullable Sequence[String]
+ private var args_cache: nullable Sequence[String] = null
# The arguments of the program as given by the OS
fun program_args: Sequence[String]