#
# assert " \n\thello \n\t".l_trim == "hello \n\t"
#
- # A whitespace is defined as any character which ascii value is less than or equal to 32
+ # `Char::is_whitespace` determines what is a whitespace.
fun l_trim: SELFTYPE
do
var iter = self.chars.iterator
while iter.is_ok do
- if iter.item.ascii > 32 then break
+ if not iter.item.is_whitespace then break
iter.next
end
if iter.index == length then return self.empty
#
# assert " \n\thello \n\t".r_trim == " \n\thello"
#
- # A whitespace is defined as any character which ascii value is less than or equal to 32
+ # `Char::is_whitespace` determines what is a whitespace.
fun r_trim: SELFTYPE
do
var iter = self.chars.reverse_iterator
while iter.is_ok do
- if iter.item.ascii > 32 then break
+ if not iter.item.is_whitespace then break
iter.next
end
if iter.index < 0 then return self.empty
end
# Trims trailing and preceding white spaces
- # A whitespace is defined as any character which ascii value is less than or equal to 32
#
# assert " Hello World ! ".trim == "Hello World !"
# assert "\na\nb\tc\t".trim == "a\nb\tc"
+ #
+ # `Char::is_whitespace` determines what is a whitespace.
fun trim: SELFTYPE do return (self.l_trim).r_trim
+ # Is the string non-empty but only made of whitespaces?
+ #
+ # assert " \n\t ".is_whitespace == true
+ # assert " hello ".is_whitespace == false
+ # assert "".is_whitespace == false
+ #
+ # `Char::is_whitespace` determines what is a whitespace.
+ fun is_whitespace: Bool
+ do
+ if is_empty then return false
+ for c in self.chars do
+ if not c.is_whitespace then return false
+ end
+ return true
+ end
+
# Returns `self` removed from its last line terminator (if any).
#
# assert "Hello\n".chomp == "Hello"
#
# assert "abAB12<>&".escape_to_c == "abAB12<>&"
# assert "\n\"'\\".escape_to_c == "\\n\\\"\\'\\\\"
+ #
+ # Most non-printable characters (bellow ASCII 32) are escaped to an octal form `\nnn`.
+ # Three digits are always used to avoid following digits to be interpreted as an element
+ # of the octal sequence.
+ #
+ # assert "{0.ascii}{1.ascii}{8.ascii}{31.ascii}{32.ascii}".escape_to_c == "\\000\\001\\010\\037 "
+ #
+ # The exceptions are the common `\t` and `\n`.
fun escape_to_c: String
do
var b = new FlatBuffer
var c = chars[i]
if c == '\n' then
b.append("\\n")
+ else if c == '\t' then
+ b.append("\\t")
else if c == '\0' then
- b.append("\\0")
+ b.append("\\000")
else if c == '"' then
b.append("\\\"")
else if c == '\'' then
else if c == '\\' then
b.append("\\\\")
else if c.ascii < 32 then
- b.append("\\{c.ascii.to_base(8, false)}")
+ b.add('\\')
+ var oct = c.ascii.to_base(8, false)
+ # Force 3 octal digits since it is the
+ # maximum allowed in the C specification
+ if oct.length == 1 then
+ b.add('0')
+ b.add('0')
+ else if oct.length == 2 then
+ b.add('0')
+ end
+ b.append(oct)
else
b.add(c)
end
# Real items, used as cache for to_cstring is called
private var real_items: nullable NativeString = null
+ # Returns a char* starting at position `index_from`
+ #
+ # WARNING: If you choose to use this service, be careful of the following.
+ #
+ # Strings and NativeString are *ideally* always allocated through a Garbage Collector.
+ # Since the GC tracks the use of the pointer for the beginning of the char*, it may be
+ # deallocated at any moment, rendering the pointer returned by this function invalid.
+ # Any access to freed memory may very likely cause undefined behaviour or a crash.
+ # (Failure to do so will most certainly result in long and painful debugging hours)
+ #
+ # The only safe use of this pointer is if it is ephemeral (e.g. read in a C function
+ # then immediately return).
+ #
+ # As always, do not modify the content of the String in C code, if this is what you want
+ # copy locally the char* as Nit Strings are immutable.
+ private fun fast_cstring: NativeString is abstract
+
redef var length: Int = 0
redef fun output
#
# assert "randomMethodId".to_snake_case == "random_method_id"
#
- # If `self` is upper, it is returned unchanged
+ # The rules are the following:
#
- # assert "RANDOM_METHOD_ID".to_snake_case == "RANDOM_METHOD_ID"
+ # An uppercase is always converted to a lowercase
#
- # If the identifier is prefixed by an underscore, the underscore is ignored
+ # assert "HELLO_WORLD".to_snake_case == "hello_world"
#
- # assert "_privateField".to_snake_case == "_private_field"
+ # An uppercase that follows a lowercase is prefixed with an underscore
+ #
+ # assert "HelloTheWORLD".to_snake_case == "hello_the_world"
+ #
+ # An uppercase that follows an uppercase and is followed by a lowercase, is prefixed with an underscore
+ #
+ # assert "HelloTHEWorld".to_snake_case == "hello_the_world"
+ #
+ # All other characters are kept as is; `self` does not need to be a proper CamelCased string.
+ #
+ # assert "=-_H3ll0Th3W0rld_-=".to_snake_case == "=-_h3ll0th3w0rld_-="
fun to_snake_case: SELFTYPE
do
- if self.is_upper then return self
+ if self.is_lower then return self
var new_str = new FlatBuffer.with_capacity(self.length)
- var is_first_char = true
+ var prev_is_lower = false
+ var prev_is_upper = false
for i in [0..length[ do
var char = chars[i]
- if is_first_char then
- new_str.add(char.to_lower)
- is_first_char = false
+ if char.is_lower then
+ new_str.add(char)
+ prev_is_lower = true
+ prev_is_upper = false
else if char.is_upper then
- new_str.add('_')
+ if prev_is_lower then
+ new_str.add('_')
+ else if prev_is_upper and i+1 < length and chars[i+1].is_lower then
+ new_str.add('_')
+ end
new_str.add(char.to_lower)
+ prev_is_lower = false
+ prev_is_upper = true
else
new_str.add(char)
+ prev_is_lower = false
+ prev_is_upper = false
end
end
# Indes in _items of the last item of the string
private var index_to: Int is noinit
- redef var chars: SequenceRead[Char] = new FlatStringCharView(self)
+ redef var chars: SequenceRead[Char] = new FlatStringCharView(self) is lazy
redef fun [](index)
do
return native.to_s_with_length(self.length)
end
+ redef fun fast_cstring do return items.fast_cstring(index_from)
+
redef fun substring(from, count)
do
assert count >= 0
super FlatText
super Buffer
- redef var chars: Sequence[Char] = new FlatBufferCharView(self)
+ redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
private var capacity: Int = 0
+ redef fun fast_cstring do return items.fast_cstring(0)
+
redef fun substrings do return new FlatSubstringsIter(self)
# Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
# Create a new empty string.
init do end
+ # Low-level creation a new buffer with given data.
+ #
+ # `items` will be used as is, without copy, to store the characters of the buffer.
+ # Aliasing issues is the responsibility of the caller.
+ #
+ # If `items` is shared, `written` should be set to true after the creation
+ # so that a modification will do a copy-on-write.
+ private init with_infos(items: NativeString, capacity, length: Int)
+ do
+ self.items = items
+ self.length = length
+ self.capacity = capacity
+ end
+
# Create a new string copied from `s`.
init from(s: Text)
do
init with_capacity(cap: Int)
do
assert cap >= 0
- # _items = new NativeString.calloc(cap)
items = new NativeString(cap+1)
capacity = cap
length = 0
if from < 0 then from = 0
if count > length then count = length
if from < count then
- var r = new FlatBuffer.with_capacity(count - from)
- while from < count do
- r.chars.push(items[from])
- from += 1
- end
+ var len = count - from
+ var r_items = new NativeString(len)
+ items.copy_to(r_items, len, from, 0)
+ var r = new FlatBuffer.with_infos(r_items, len, len)
return r
else
return new FlatBuffer
redef class Int
# Wrapper of strerror C function
- private fun strerror_ext: NativeString is extern `{
- return strerror(recv);
- `}
+ private fun strerror_ext: NativeString is extern "strerror"
# Returns a string describing error number
fun strerror: String do return strerror_ext.to_s
# assert 1.to_s == "1"
# assert (-123).to_s == "-123"
redef fun to_s do
+ # Fast case for common numbers
+ if self == 0 then return "0"
+ if self == 1 then return "1"
+
var nslen = int_to_s_len
var ns = new NativeString(nslen + 1)
ns[nslen] = '\0'
end
end
+redef class NativeArray[E]
+ # Join all the elements using `to_s`
+ #
+ # REQUIRE: `self isa NativeArray[String]`
+ # REQUIRE: all elements are initialized
+ fun native_to_s: String
+ do
+ assert self isa NativeArray[String]
+ var l = length
+ var na = self
+ var i = 0
+ var sl = 0
+ var mypos = 0
+ while i < l do
+ sl += na[i].length
+ i += 1
+ mypos += 1
+ end
+ var ns = new NativeString(sl + 1)
+ ns[sl] = '\0'
+ i = 0
+ var off = 0
+ while i < mypos do
+ var tmp = na[i]
+ var tpl = tmp.length
+ if tmp isa FlatString then
+ tmp.items.copy_to(ns, tpl, tmp.index_from, off)
+ off += tpl
+ else
+ for j in tmp.substrings do
+ var s = j.as(FlatString)
+ var slen = s.length
+ s.items.copy_to(ns, slen, s.index_from, off)
+ off += slen
+ end
+ end
+ i += 1
+ end
+ return ns.to_s_with_length(sl)
+ end
+end
+
redef class Map[K,V]
# Concatenate couple of 'key value'.
# key and value are separated by `couple_sep`.
# Creates a new NativeString with a capacity of `length`
new(length: Int) is intern
+ # Returns a char* starting at `index`.
+ #
+ # WARNING: Unsafe for extern code, use only for temporary
+ # pointer manipulation purposes (e.g. write to file or such)
+ fun fast_cstring(index: Int): NativeString is intern
+
# Get char at `index`.
fun [](index: Int): Char is intern