# High-level abstraction for all text representations
abstract class Text
super Comparable
- super StringCapable
redef type OTHER: Text
#
# assert "abcd".has_substring("bc",1) == true
# assert "abcd".has_substring("bc",2) == false
+ #
+ # Returns true iff all characters of `str` are presents
+ # at the expected index in `self.`
+ # The first character of `str` being at `pos`, the second
+ # character being at `pos+1` and so on...
+ #
+ # This means that all characters of `str` need to be inside `self`.
+ #
+ # assert "abcd".has_substring("xab", -1) == false
+ # assert "abcd".has_substring("cdx", 2) == false
+ #
+ # And that the empty string is always a valid substring.
+ #
+ # assert "abcd".has_substring("", 2) == true
+ # assert "abcd".has_substring("", 200) == true
fun has_substring(str: String, pos: Int): Bool
do
+ if str.is_empty then return true
+ if pos < 0 or pos + str.length > length then return false
var myiter = self.chars.iterator_from(pos)
var itsiter = str.chars.iterator
while myiter.is_ok and itsiter.is_ok do
# assert "\na\nb\tc\t".trim == "a\nb\tc"
fun trim: SELFTYPE do return (self.l_trim).r_trim
- # Mangle a string to be a unique string only made of alphanumeric characters
+ # Returns `self` removed from its last line terminator (if any).
+ #
+ # assert "Hello\n".chomp == "Hello"
+ # assert "Hello".chomp == "Hello"
+ #
+ # assert "\n".chomp == ""
+ # assert "".chomp == ""
+ #
+ # Line terminators are `"\n"`, `"\r\n"` and `"\r"`.
+ # A single line terminator, the last one, is removed.
+ #
+ # assert "\r\n".chomp == ""
+ # assert "\r\n\n".chomp == "\r\n"
+ # assert "\r\n\r\n".chomp == "\r\n"
+ # assert "\r\n\r".chomp == "\r\n"
+ #
+ # Note: unlike with most IO methods like `IStream::read_line`,
+ # a single `\r` is considered here to be a line terminator and will be removed.
+ fun chomp: SELFTYPE
+ do
+ var len = length
+ if len == 0 then return self
+ var l = self.chars.last
+ if l == '\r' then
+ return substring(0, len-1)
+ else if l != '\n' then
+ return self
+ else if len > 1 and self.chars[len-2] == '\r' then
+ return substring(0, len-2)
+ else
+ return substring(0, len-1)
+ end
+ end
+
+ # Justify a self in a space of `length`
+ #
+ # `left` is the space ratio on the left side.
+ # * 0.0 for left-justified (no space at the left)
+ # * 1.0 for right-justified (all spaces at the left)
+ # * 0.5 for centered (half the spaces at the left)
+ #
+ # Examples
+ #
+ # assert "hello".justify(10, 0.0) == "hello "
+ # assert "hello".justify(10, 1.0) == " hello"
+ # assert "hello".justify(10, 0.5) == " hello "
+ #
+ # If `length` is not enough, `self` is returned as is.
+ #
+ # assert "hello".justify(2, 0.0) == "hello"
+ #
+ # REQUIRE: `left >= 0.0 and left <= 1.0`
+ # ENSURE: `self.length <= length implies result.length == length`
+ # ENSURE: `self.length >= length implies result == self`
+ fun justify(length: Int, left: Float): SELFTYPE
+ do
+ var diff = length - self.length
+ if diff <= 0 then return self
+ assert left >= 0.0 and left <= 1.0
+ var before = (diff.to_f * left).to_i
+ return " " * before + self + " " * (diff-before)
+ end
+
+ # Mangle a string to be a unique string only made of alphanumeric characters and underscores.
+ #
+ # This method is injective (two different inputs never produce the same
+ # output) and the returned string always respect the following rules:
+ #
+ # * Contains only US-ASCII letters, digits and underscores.
+ # * Never starts with a digit.
+ # * Never ends with an underscore.
+ # * Never contains two contiguous underscores.
+ #
+ # assert "42_is/The answer!".to_cmangle == "_52d2_is_47dThe_32danswer_33d"
+ # assert "__".to_cmangle == "_95d_95d"
+ # assert "__d".to_cmangle == "_95d_d"
+ # assert "_d_".to_cmangle == "_d_95d"
+ # assert "_42".to_cmangle == "_95d42"
+ # assert "foo".to_cmangle == "foo"
+ # assert "".to_cmangle == ""
fun to_cmangle: String
do
+ if is_empty then return ""
var res = new FlatBuffer
var underscore = false
- for i in [0..length[ do
- var c = chars[i]
+ var start = 0
+ var c = chars[0]
+
+ if c >= '0' and c <= '9' then
+ res.add('_')
+ res.append(c.ascii.to_s)
+ res.add('d')
+ start = 1
+ end
+ for i in [start..length[ do
+ c = chars[i]
if (c >= 'a' and c <= 'z') or (c >='A' and c <= 'Z') then
res.add(c)
underscore = false
underscore = false
end
end
+ if underscore then
+ res.append('_'.ascii.to_s)
+ res.add('d')
+ end
return res.to_s
end
# assert "\n\"'\\\{\}".escape_to_nit == "\\n\\\"\\'\\\\\\\{\\\}"
fun escape_to_nit: String do return escape_more_to_c("\{\}")
+ # Escape to POSIX Shell (sh).
+ #
+ # Abort if the text contains a null byte.
+ #
+ # assert "\n\"'\\\{\}0".escape_to_sh == "'\n\"'\\''\\\{\}0'"
+ fun escape_to_sh: String do
+ var b = new FlatBuffer
+ b.chars.add '\''
+ for i in [0..length[ do
+ var c = chars[i]
+ if c == '\'' then
+ b.append("'\\''")
+ else
+ assert without_null_byte: c != '\0'
+ b.add(c)
+ end
+ end
+ b.chars.add '\''
+ return b.to_s
+ end
+
+ # Escape to include in a Makefile
+ #
+ # Unfortunately, some characters are not escapable in Makefile.
+ # These characters are `;`, `|`, `\`, and the non-printable ones.
+ # They will be rendered as `"?{hex}"`.
+ fun escape_to_mk: String do
+ var b = new FlatBuffer
+ for i in [0..length[ do
+ var c = chars[i]
+ if c == '$' then
+ b.append("$$")
+ else if c == ':' or c == ' ' or c == '#' then
+ b.add('\\')
+ b.add(c)
+ else if c.ascii < 32 or c == ';' or c == '|' or c == '\\' or c == '=' then
+ b.append("?{c.ascii.to_base(16, false)}")
+ else
+ b.add(c)
+ end
+ end
+ return b.to_s
+ end
+
# Return a string where Nit escape sequences are transformed.
#
# var s = "\\n"
return buf.to_s
end
- # Escape the four characters `<`, `>`, `&`, and `"` with their html counterpart
+ # Escape the characters `<`, `>`, `&`, `"`, `'` and `/` as HTML/XML entity references.
#
- # assert "a&b->\"x\"".html_escape == "a&b->"x""
+ # assert "a&b-<>\"x\"/'".html_escape == "a&b-<>"x"/'"
+ #
+ # SEE: <https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content>
fun html_escape: SELFTYPE
do
var buf = new FlatBuffer
else if c == '>' then
buf.append ">"
else if c == '"' then
- buf.append """
+ buf.append """
+ else if c == '\'' then
+ buf.append "'"
+ else if c == '/' then
+ buf.append "/"
else buf.add c
end
end
end
+ # Escape string used in labels for graphviz
+ #
+ # assert ">><<".escape_to_dot == "\\>\\>\\<\\<"
+ fun escape_to_dot: String
+ do
+ return escape_more_to_c("|\{\}<>")
+ end
+
# Flat representation of self
fun flatten: FlatText is abstract
abstract class FlatText
super Text
- private var items: NativeString
+ # Underlying C-String (`char*`)
+ #
+ # Warning : Might be void in some subclasses, be sure to check
+ # if set before using it.
+ private var items: NativeString is noinit
# Real items, used as cache for to_cstring is called
private var real_items: nullable NativeString = null
redef var length: Int = 0
- init do end
-
redef fun output
do
var i = 0
type SELFTYPE: Text
- private var target: SELFTYPE
-
- private init(tgt: SELFTYPE)
- do
- target = tgt
- end
+ var target: SELFTYPE
redef fun is_empty do return target.is_empty
end
+# A `String` holds and manipulates an arbitrary sequence of characters.
+#
+# String objects may be created using literals.
+#
+# assert "Hello World!" isa String
abstract class String
super Text
# assert "abc" * 0 == ""
fun *(i: Int): SELFTYPE is abstract
+ # Insert `s` at `pos`.
+ #
+ # assert "helloworld".insert_at(" ", 5) == "hello world"
fun insert_at(s: String, pos: Int): SELFTYPE is abstract
+ redef fun substrings: Iterator[String] is abstract
+
# Returns a reversed version of self
#
# assert "hello".reversed == "olleh"
return new_str.to_s
end
+
+ # Returns a capitalized `self`
+ #
+ # Letters that follow a letter are lowercased
+ # Letters that follow a non-letter are upcased.
+ #
+ # SEE : `Char::is_letter` for the definition of letter.
+ #
+ # assert "jAVASCRIPT".capitalized == "Javascript"
+ # assert "i am root".capitalized == "I Am Root"
+ # assert "ab_c -ab0c ab\nc".capitalized == "Ab_C -Ab0C Ab\nC"
+ fun capitalized: SELFTYPE do
+ if length == 0 then return self
+
+ var buf = new FlatBuffer.with_capacity(length)
+
+ var curr = chars[0].to_upper
+ var prev = curr
+ buf[0] = curr
+
+ for i in [1 .. length[ do
+ prev = curr
+ curr = self[i]
+ if prev.is_letter then
+ buf[i] = curr.to_lower
+ else
+ buf[i] = curr.to_upper
+ end
+ end
+
+ return buf.to_s
+ end
end
private class FlatSubstringsIter
var tgt: nullable FlatText
- init(tgt: FlatText) do self.tgt = tgt
-
redef fun item do
assert is_ok
return tgt.as(not null)
super String
# Index in _items of the start of the string
- private var index_from: Int
+ private var index_from: Int is noinit
# Indes in _items of the last item of the string
- private var index_to: Int
+ private var index_to: Int is noinit
redef var chars: SequenceRead[Char] = new FlatStringCharView(self)
redef fun reversed
do
- var native = calloc_string(self.length + 1)
+ var native = new NativeString(self.length + 1)
var length = self.length
var items = self.items
var pos = 0
redef fun to_upper
do
- var outstr = calloc_string(self.length + 1)
+ var outstr = new NativeString(self.length + 1)
var out_index = 0
var myitems = self.items
redef fun to_lower
do
- var outstr = calloc_string(self.length + 1)
+ var outstr = new NativeString(self.length + 1)
var out_index = 0
var myitems = self.items
redef fun to_cstring: NativeString
do
- if real_items != null then return real_items.as(not null)
- if index_from > 0 or index_to != items.cstring_length - 1 then
- var newItems = calloc_string(length + 1)
+ if real_items != null then
+ return real_items.as(not null)
+ else
+ var newItems = new NativeString(length + 1)
self.items.copy_to(newItems, length, index_from, 0)
newItems[length] = '\0'
self.real_items = newItems
return newItems
end
- return items
end
redef fun ==(other)
var total_length = my_length + its_length
- var target_string = calloc_string(my_length + its_length + 1)
+ var target_string = new NativeString(my_length + its_length + 1)
self.items.copy_to(target_string, my_length, index_from, 0)
if s isa FlatString then
var my_items = self.items
- var target_string = calloc_string((final_length) + 1)
+ var target_string = new NativeString(final_length + 1)
target_string[final_length] = '\0'
end
+# A mutable sequence of characters.
abstract class Buffer
super Text
# Specific implementations MUST set this to `true` in order to invalidate caches
protected var is_dirty = true
+ # Copy-On-Write flag
+ #
+ # If the `Buffer` was to_s'd, the next in-place altering
+ # operation will cause the current `Buffer` to be re-allocated.
+ #
+ # The flag will then be set at `false`.
+ protected var written = false
+
# Modifies the char contained at pos `index`
#
# DEPRECATED : Use self.chars.[]= instead
# assert b == "hello world!"
fun lower is abstract
+ # Capitalizes each word in `self`
+ #
+ # Letters that follow a letter are lowercased
+ # Letters that follow a non-letter are upcased.
+ #
+ # SEE: `Char::is_letter` for the definition of a letter.
+ #
+ # var b = new FlatBuffer.from("jAVAsCriPt")
+ # b.capitalize
+ # assert b == "Javascript"
+ # b = new FlatBuffer.from("i am root")
+ # b.capitalize
+ # assert b == "I Am Root"
+ # b = new FlatBuffer.from("ab_c -ab0c ab\nc")
+ # b.capitalize
+ # assert b == "Ab_C -Ab0C Ab\nC"
+ fun capitalize do
+ if length == 0 then return
+ var c = self[0].to_upper
+ self[0] = c
+ var prev = c
+ for i in [1 .. length[ do
+ prev = c
+ c = self[i]
+ if prev.is_letter then
+ self[i] = c.to_lower
+ else
+ self[i] = c.to_upper
+ end
+ end
+ end
+
redef fun hash
do
if is_dirty then hash_cache = null
redef fun substrings do return new FlatSubstringsIter(self)
+ # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
+ #
+ # This happens when an operation modifies the current `Buffer` and
+ # the Copy-On-Write flag `written` is set at true.
+ private fun reset do
+ var nns = new NativeString(capacity)
+ items.copy_to(nns, length, 0, 0)
+ items = nns
+ written = false
+ end
+
redef fun [](index)
do
assert index >= 0
add(item)
return
end
+ if written then reset
assert index >= 0 and index < length
items[index] = item
end
redef fun clear do
is_dirty = true
+ if written then reset
length = 0
end
var c = capacity
if cap <= c then return
while c <= cap do c = c * 2 + 2
- var a = calloc_string(c+1)
+ # The COW flag can be set at false here, since
+ # it does a copy of the current `Buffer`
+ written = false
+ var a = new NativeString(c+1)
if length > 0 then items.copy_to(a, length, 0, 0)
items = a
capacity = c
redef fun to_s: String
do
- return to_cstring.to_s_with_length(length)
+ written = true
+ if length == 0 then items = new NativeString(1)
+ return new FlatString.with_infos(items, length, 0, length - 1)
end
redef fun to_cstring
do
if is_dirty then
- var new_native = calloc_string(length + 1)
+ var new_native = new NativeString(length + 1)
new_native[length] = '\0'
if length > 0 then items.copy_to(new_native, length, 0, 0)
real_items = new_native
# Create a new empty string.
init do end
+ # Create a new string copied from `s`.
init from(s: Text)
do
capacity = s.length + 1
length = s.length
- items = calloc_string(capacity)
+ items = new NativeString(capacity)
if s isa FlatString then
s.items.copy_to(items, length, s.index_from, 0)
else if s isa FlatBuffer then
do
assert cap >= 0
# _items = new NativeString.calloc(cap)
- items = calloc_string(cap+1)
+ items = new NativeString(cap+1)
capacity = cap
length = 0
end
redef fun reverse
do
- var ns = calloc_string(capacity)
+ written = false
+ var ns = new NativeString(capacity)
var si = length - 1
var ni = 0
var it = items
redef fun upper
do
+ if written then reset
var it = items
var id = length - 1
while id >= 0 do
redef fun lower
do
+ if written then reset
var it = items
var id = length - 1
while id >= 0 do
private class FlatBufferCharView
super BufferCharView
- super StringCapable
redef type SELFTYPE: FlatBuffer
redef fun append(s)
do
- var my_items = target.items
var s_length = s.length
if target.capacity < s.length then enlarge(s_length + target.length)
end
# The class name of the object.
#
- # assert 5.class_name == "Int"
+ # assert 5.class_name == "Int"
fun class_name: String do return native_class_name.to_s
# Developer readable representation of `self`.
end
redef class Float
- # Pretty print self, print needoed decimals up to a max of 3.
+ # Pretty representation of `self`, with decimals as needed from 1 to a maximum of 3
#
- # assert 12.34.to_s == "12.34"
- # assert (-0120.03450).to_s == "-120.035"
+ # assert 12.34.to_s == "12.34"
+ # assert (-0120.030).to_s == "-120.03"
#
- # see `to_precision` for a different precision.
+ # see `to_precision` for a custom precision.
redef fun to_s do
var str = to_precision( 3 )
if is_inf != 0 or is_nan then return str
return str
end
- # `self` representation with `nb` digits after the '.'.
+ # `String` representation of `self` with the given number of `decimals`
#
- # assert 12.345.to_precision(1) == "12.3"
- # assert 12.345.to_precision(2) == "12.35"
- # assert 12.345.to_precision(3) == "12.345"
- # assert 12.345.to_precision(4) == "12.3450"
- fun to_precision(nb: Int): String
+ # assert 12.345.to_precision(0) == "12"
+ # assert 12.345.to_precision(3) == "12.345"
+ # assert (-12.345).to_precision(3) == "-12.345"
+ # assert (-0.123).to_precision(3) == "-0.123"
+ # assert 0.999.to_precision(2) == "1.00"
+ # assert 0.999.to_precision(4) == "0.9990"
+ fun to_precision(decimals: Int): String
do
if is_nan then return "nan"
return "-inf"
end
- if nb == 0 then return self.to_i.to_s
+ if decimals == 0 then return self.to_i.to_s
var f = self
- for i in [0..nb[ do f = f * 10.0
+ for i in [0..decimals[ do f = f * 10.0
if self > 0.0 then
f = f + 0.5
else
f = f - 0.5
end
var i = f.to_i
- if i == 0 then return "0.0"
- var s = i.to_s
+ if i == 0 then return "0." + "0"*decimals
+
+ # Prepare both parts of the float, before and after the "."
+ var s = i.abs.to_s
var sl = s.length
- if sl > nb then
- var p1 = s.substring(0, s.length-nb)
- var p2 = s.substring(s.length-nb, nb)
- return p1 + "." + p2
+ var p1
+ var p2
+ if sl > decimals then
+ # Has something before the "."
+ p1 = s.substring(0, sl-decimals)
+ p2 = s.substring(sl-decimals, decimals)
else
- return "0." + ("0"*(nb-sl)) + s
+ p1 = "0"
+ p2 = "0"*(decimals-sl) + s
end
+
+ if i < 0 then p1 = "-" + p1
+
+ return p1 + "." + p2
end
# `self` representation with `nb` digits after the '.'.
# Returns true if the char is a numerical digit
#
- # assert '0'.is_numeric
- # assert '9'.is_numeric
- # assert not 'a'.is_numeric
- # assert not '?'.is_numeric
+ # assert '0'.is_numeric
+ # assert '9'.is_numeric
+ # assert not 'a'.is_numeric
+ # assert not '?'.is_numeric
fun is_numeric: Bool
do
return self >= '0' and self <= '9'
# Returns true if the char is an alpha digit
#
- # assert 'a'.is_alpha
- # assert 'Z'.is_alpha
- # assert not '0'.is_alpha
- # assert not '?'.is_alpha
+ # assert 'a'.is_alpha
+ # assert 'Z'.is_alpha
+ # assert not '0'.is_alpha
+ # assert not '?'.is_alpha
fun is_alpha: Bool
do
return (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z')
# Returns true if the char is an alpha or a numeric digit
#
- # assert 'a'.is_alphanumeric
- # assert 'Z'.is_alphanumeric
- # assert '0'.is_alphanumeric
- # assert '9'.is_alphanumeric
- # assert not '?'.is_alphanumeric
+ # assert 'a'.is_alphanumeric
+ # assert 'Z'.is_alphanumeric
+ # assert '0'.is_alphanumeric
+ # assert '9'.is_alphanumeric
+ # assert not '?'.is_alphanumeric
fun is_alphanumeric: Bool
do
return self.is_numeric or self.is_alpha
# Native strings are simple C char *
extern class NativeString `{ char* `}
- super StringCapable
# Creates a new NativeString with a capacity of `length`
new(length: Int) is intern
+
+ # Get char at `index`.
fun [](index: Int): Char is intern
+
+ # Set char `item` at index.
fun []=(index: Int, item: Char) is intern
+
+ # Copy `self` to `dest`.
fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
# Position of the first nul character.
while self[l] != '\0' do l += 1
return l
end
+
+ # Parse `self` as an Int.
fun atoi: Int is intern
+
+ # Parse `self` as a Float.
fun atof: Float is extern "atof"
redef fun to_s
return to_s_with_length(cstring_length)
end
+ # Returns `self` as a String of `length`.
fun to_s_with_length(length: Int): FlatString
do
assert length >= 0
- return new FlatString.with_infos(self, length, 0, length - 1)
+ var str = new FlatString.with_infos(self, length, 0, length - 1)
+ return str
end
+ # Returns `self` as a new String.
fun to_s_with_copy: FlatString
do
var length = cstring_length
- var new_self = calloc_string(length + 1)
+ var new_self = new NativeString(length + 1)
copy_to(new_self, length, 0, 0)
- return new FlatString.with_infos(new_self, length, 0, length - 1)
+ var str = new FlatString.with_infos(new_self, length, 0, length - 1)
+ new_self[length] = '\0'
+ str.real_items = new_self
+ return str
end
end
-# StringCapable objects can create native strings
-interface StringCapable
- protected fun calloc_string(size: Int): NativeString is intern
-end
-
redef class Sys
private var args_cache: nullable Sequence[String]
#
# Note: it caching is not usefull, see `alpha_comparator`
class CachedAlphaComparator
- super Comparator[Object]
+ super Comparator
+ redef type COMPARED: Object
private var cache = new HashMap[Object, String]
# see `alpha_comparator`
private class AlphaComparator
- super Comparator[Object]
+ super Comparator
redef fun compare(a, b) do return a.to_s <=> b.to_s
end
# var a = [1, 2, 3, 10, 20]
# alpha_comparator.sort(a)
# assert a == [1, 10, 2, 20, 3]
-fun alpha_comparator: Comparator[Object] do return once new AlphaComparator
+fun alpha_comparator: Comparator do return once new AlphaComparator
# The arguments of the program as given by the OS
fun args: Sequence[String]