core :: FlatText :: byte_length=
core :: FlatText :: char_to_byte_index
Index of the characterindex
in _items
core :: FlatText :: chars_to_escape_to_c
By escapingself
to C, how many more bytes will be needed ?
core :: FlatText :: chars_to_html_escape
By escapingself
to HTML, how many more bytes will be needed ?
core :: FlatText :: defaultinit
core :: FlatText :: fast_cstring
Returns a char* starting at positionfirst_byte
core :: bytes $ FlatText :: append_to_bytes
Appendsself.bytes
to b
text_stat :: text_stat $ FlatText :: char_to_byte_index
Index of the characterindex
in _items
base64 :: base64 $ FlatText :: check_base64
Isself
a well-formed Base64 entity ?
core :: flat $ FlatText :: copy_to_native
Copiesn
bytes from self
at src_offset
into dest
starting at dest_offset
core $ FlatText :: copy_to_native
Copiesn
bytes from self
at src_offset
into dest
starting at dest_offset
base64 :: base64 $ FlatText :: decode_base64
Decodes the receiver string to base64 using a custom padding character.base64 :: base64 $ FlatText :: encode_base64
Encodes the receiver string to base64 using a custom padding character.core :: flat $ FlatText :: escape_to_c
Escape"
\
'
, trigraphs and non printable characters using the rules of literal C strings and characters
core :: flat $ FlatText :: html_escape
Escape the characters<
, >
, &
, "
, '
and /
as HTML/XML entity references.
serialization :: Serializable :: accept_json_serializer
Refinable service to customize the serialization of this class to JSONserialization :: Serializable :: accept_msgpack_attribute_counter
Hook to customize the behavior of theAttributeCounter
serialization :: Serializable :: accept_msgpack_serializer
Hook to customize the serialization of this class to MessagePackserialization :: Serializable :: add_to_bundle
Called by[]=
to dynamically choose the appropriate method according
core :: Text :: binarydigest_to_bytes
Return aBytes
by reading 0 and 1.
core :: FlatText :: byte_length=
core :: Text :: capitalized
Returns a capitalizedself
core :: FlatText :: char_to_byte_index
Index of the characterindex
in _items
core :: FlatText :: chars_to_escape_to_c
By escapingself
to C, how many more bytes will be needed ?
core :: FlatText :: chars_to_html_escape
By escapingself
to HTML, how many more bytes will be needed ?
core :: Object :: class_factory
Implementation used byget_class
to create the specific class.
core :: Text :: copy_to_native
Copiesn
bytes from self
at src_offset
into dest
starting at dest_offset
serialization :: Serializable :: core_serialize_to
Actual serialization ofself
to serializer
core :: Text :: decode_base64
Decodes the receiver string to base64 using a custom padding character.core :: Comparable :: defaultinit
core :: Text :: defaultinit
mpi :: Sendable :: defaultinit
core :: Object :: defaultinit
core :: FlatText :: defaultinit
core :: Writable :: defaultinit
core :: Cloneable :: defaultinit
core :: Pattern :: defaultinit
core :: Text :: deserialize_json
Deserialize anullable Object
from this JSON formatted string
core :: Text :: encode_base64
Encodes the receiver string to base64 using a custom padding character.core :: Text :: english_scoring
Scoreself
according to english's letter frequency.
core :: Text :: escape_more_to_c
Escape additionnal characterscore :: Text :: escape_to_c
Escape"
\
'
, trigraphs and non printable characters using the rules of literal C strings and characters
core :: Text :: escape_to_js
Escape the content ofself
to pass to JavaScript code
core :: Text :: escape_to_utf16
Returnsself
with all characters escaped with their UTF-16 representation
core :: FlatText :: fast_cstring
Returns a char* starting at positionfirst_byte
core :: Text :: file_extension
Return right-most extension (without the dot)core :: Text :: file_lstat
The status of a file or of a symlink. see POSIX lstat(2).serialization :: Serializable :: from_deserializer
Create an instance of this class from thedeserializer
core :: Text :: from_percent_encoding
Decodeself
from percent (or URL) encoding to a clear string
core :: Text :: from_utf16_digit
Returns a UTF-16 escape valuecore :: Text :: from_utf16_escape
Returns the Unicode char escaped byself
core :: Text :: group_exists
Does the operating system know the group namedself
?
core :: Text :: has_substring
Does self have a substringstr
starting from position pos
?
core :: Text :: hexdigest_to_bytes
Returns a newBytes
instance with the digest as content
core :: Text :: html_escape
Escape the characters<
, >
, &
, "
, '
and /
as HTML/XML entity references.
core :: Text :: http_download
Download the file at URLself
to output_path
with a simple HTTP request
core :: Text :: index_of_from
Gets the index of the first occurence of ´c´ starting from ´pos´core :: Text :: is_numeric
Is this string in a valid numeric format compatible withto_f
?
core :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
core :: Text :: is_whitespace
Is the string non-empty but only made of whitespaces?core :: Text :: last_index_of_from
The index of the last occurrence of an element starting from pos (in reverse order).core :: Text :: levenshtein_distance
Return the Levenshtein distance between two stringscore :: Text :: light_gray
Make the text appear in light gray (or white) in a ANSI/VT100 terminal.serialization :: Serializable :: msgpack_extra_array_items
Hook to request a larger than usual metadata arraycore :: Object :: output_class_name
Display class name on stdout (debug only).core :: Text :: parse_bmfont
Parseself
as an XML BMFont description file
core :: Text :: remove_all
Returns a copy ofself
minus all occurences of pattern
core :: Text :: replace_first
Replace the first occurrence ofpattern
with string
core :: Text :: search_all
Search all occurrences ofpattern
into self.
core :: Pattern :: search_all_in
Search allself
occurrences into s
.
core :: Text :: search_from
Search the first occurence ofpattern
after from
.
core :: Pattern :: search_index_in
Searchself
into s
from a certain position.
core :: Text :: search_last
Search the last occurence of the textt
.
core :: Text :: search_last_up_to
Search the last occurence of the textt
before up_to
.
serialization :: Serializable :: serialize_msgpack
Serializeself
to MessagePack bytes
serialization :: Serializable :: serialize_to
Serializeself
to serializer
serialization :: Serializable :: serialize_to_json
Serializeself
to JSON
core :: Text :: simplify_path
Simplify a file path by remove useless.
, removing //
, and resolving ..
core :: Text :: split_once_on
Splitself
on the first occurence of pattern
core :: Text :: split_with
@deprecated alias forsplit
core :: Text :: strip_extension
Remove the trailingextension
.
core :: Text :: strip_nullable
Strip thenullable
prefix from the type name self
core :: Text :: strip_nullable_and_params
Strip thenullable
prefix and the params from the type name self
core :: Text :: substring_from
Create a substring fromself
beginning at the from
position
core :: Text :: to_camel_case
Takes a snake caseself
and converts it to camel case
core :: Text :: to_cmangle
Mangle a string to be a unique string only made of alphanumeric characters and underscores.core :: Text :: to_percent_encoding
Encodeself
to percent (or URL) encoding
serialization :: Serializable :: to_pretty_json
Serializeself
to plain pretty JSON
core :: Text :: to_program_name
Convert the path (self
) to a program name.
core :: Text :: to_snake_case
Takes a camel caseself
and converts it to snake case
core :: Text :: to_sql_date_string
Format the date represented byself
into an escaped string for SQLite
core :: Text :: unescape_json
Removes JSON-escaping if necessary in a JSON stringcore :: Text :: unescape_nit
Return a string where Nit escape sequences are transformed.core :: Text :: unescape_to_bytes
Return aBytes
instance where Nit escape sequences are transformed.
core :: Text :: user_exists
Does the operating system know the user namedself
?
core :: Writable :: write_to_bytes
Likewrite_to
but return a new Bytes (may be quite large)
core :: Writable :: write_to_file
Likewrite_to
but take care of creating the file
core :: Writable :: write_to_string
Likewrite_to
but return a new String (may be quite large).
serialization :: DirectSerializable
Instances of this class are not delayed and instead serialized immediatelySerializer::serialize
# All kinds of array-based text representations.
abstract class FlatText
super Text
# Underlying CString (`char*`)
#
# Warning: Might be void in some subclasses, be sure to check
# if set before using it.
var items: CString is noinit
# Returns a char* starting at position `first_byte`
#
# WARNING: If you choose to use this service, be careful of the following.
#
# Strings and CString are *ideally* always allocated through a Garbage Collector.
# Since the GC tracks the use of the pointer for the beginning of the char*, it may be
# deallocated at any moment, rendering the pointer returned by this function invalid.
# Any access to freed memory may very likely cause undefined behaviour or a crash.
# (Failure to do so will most certainly result in long and painful debugging hours)
#
# The only safe use of this pointer is if it is ephemeral (e.g. read in a C function
# then immediately return).
#
# As always, do not modify the content of the String in C code, if this is what you want
# copy locally the char* as Nit Strings are immutable.
fun fast_cstring: CString is abstract
redef var length = 0
redef var byte_length = 0
redef fun output
do
var i = 0
while i < length do
items[i].output
i += 1
end
end
redef fun copy_to_native(dest, n, src_offset, dest_offset) do
items.copy_to(dest, n, src_offset, dest_offset)
end
end
lib/core/text/abstract_text.nit:1400,1--1443,3
redef class FlatText
# First byte of the CString
protected fun first_byte: Int do return 0
# Last byte of the CString
protected fun last_byte: Int do return first_byte + _byte_length - 1
# Cache of the latest position (char) explored in the string
var position: Int = 0
# Cached position (bytes) in the CString underlying the String
var bytepos: Int = 0
# Index of the character `index` in `_items`
fun char_to_byte_index(index: Int): Int do
var dpos = index - _position
var b = _bytepos
var its = _items
if dpos == 1 then
if its[b] & 0x80 == 0x00 then
b += 1
else
b += its.length_of_char_at(b)
end
_bytepos = b
_position = index
return b
end
if dpos == -1 then
b = its.find_beginning_of_char_at(b - 1)
_bytepos = b
_position = index
return b
end
if dpos == 0 then return b
var ln = _length
var pos = _position
# Find best insertion point
var delta_begin = index
var delta_end = (ln - 1) - index
var delta_cache = (pos - index).abs
var min = delta_begin
if delta_cache < min then min = delta_cache
if delta_end < min then min = delta_end
var ns_i: Int
var my_i: Int
if min == delta_cache then
ns_i = _bytepos
my_i = pos
else if min == delta_begin then
ns_i = first_byte
my_i = 0
else
ns_i = its.find_beginning_of_char_at(last_byte)
my_i = _length - 1
end
ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
_position = index
_bytepos = ns_i
return ns_i
end
# By escaping `self` to HTML, how many more bytes will be needed ?
fun chars_to_html_escape: Int do
var its = _items
var max = last_byte
var pos = first_byte
var endlen = 0
while pos <= max do
var c = its[pos]
if c == u'<' then
endlen += 3
else if c == u'>' then
endlen += 3
else if c == u'&' then
endlen += 4
else if c == u'"' then
endlen += 4
else if c == u'\'' then
endlen += 4
else if c == 0x2F then
endlen += 4
end
pos += 1
end
return endlen
end
redef fun html_escape
do
var extra = chars_to_html_escape
if extra == 0 then return to_s
var its = _items
var max = last_byte
var pos = first_byte
var nlen = extra + _byte_length
var nits = new CString(nlen)
var outpos = 0
while pos <= max do
var c = its[pos]
# Special codes:
# Some HTML characters are used as meta-data, they need
# to be replaced by an HTML-Escaped equivalent
if c == u'<' then
nits[outpos] = u'&'
nits[outpos + 1] = u'l'
nits[outpos + 2] = u't'
nits[outpos + 3] = u';'
outpos += 4
else if c == u'>' then
nits[outpos] = u'&'
nits[outpos + 1] = u'g'
nits[outpos + 2] = u't'
nits[outpos + 3] = u';'
outpos += 4
else if c == u'&' then
nits[outpos] = u'&'
nits[outpos + 1] = u'a'
nits[outpos + 2] = u'm'
nits[outpos + 3] = u'p'
nits[outpos + 4] = u';'
outpos += 5
else if c == u'"' then
nits[outpos] = u'&'
nits[outpos + 1] = u'#'
nits[outpos + 2] = u'3'
nits[outpos + 3] = u'4'
nits[outpos + 4] = u';'
outpos += 5
else if c == u'\'' then
nits[outpos] = u'&'
nits[outpos + 1] = u'#'
nits[outpos + 2] = u'3'
nits[outpos + 3] = u'9'
nits[outpos + 4] = u';'
outpos += 5
else if c == u'/' then
nits[outpos] = u'&'
nits[outpos + 1] = u'#'
nits[outpos + 2] = u'4'
nits[outpos + 3] = u'7'
nits[outpos + 4] = u';'
outpos += 5
else
nits[outpos] = c
outpos += 1
end
pos += 1
end
var s = new FlatString.with_infos(nits, nlen, 0)
return s
end
# By escaping `self` to C, how many more bytes will be needed ?
#
# This enables a double-optimization in `escape_to_c` since if this
# method returns 0, then `self` does not need escaping and can be
# returned as-is
fun chars_to_escape_to_c: Int do
var its = _items
var max = last_byte
var pos = first_byte
var req_esc = 0
while pos <= max do
var c = its[pos]
if c == u'\n' then
req_esc += 1
else if c == u'\t' then
req_esc += 1
else if c == u'"' then
req_esc += 1
else if c == u'\'' then
req_esc += 1
else if c == u'\\' then
req_esc += 1
else if c == u'?' then
var j = pos + 1
if j < length then
var next = its[j]
# We ignore `??'` because it will be escaped as `??\'`.
if
next == 0x21 or
next == 0x28 or
next == 0x29 or
next == 0x2D or
next == 0x2F or
next == 0x3C or
next == 0x3D or
next == 0x3E
then req_esc += 1
end
else if c < 32 then
req_esc += 3
end
pos += 1
end
return req_esc
end
redef fun escape_to_c do
var ln_extra = chars_to_escape_to_c
if ln_extra == 0 then return self.to_s
var its = _items
var max = last_byte
var nlen = _byte_length + ln_extra
var nns = new CString(nlen)
var pos = first_byte
var opos = 0
while pos <= max do
var c = its[pos]
# Special codes:
#
# Any byte with value < 32 is a control character
# All their uses will be replaced by their octal
# value in C.
#
# There are two exceptions however:
#
# * 0x09 => \t
# * 0x0A => \n
#
# Aside from the code points above, the following are:
#
# * 0x22 => \"
# * 0x27 => \'
# * 0x5C => \\
if c == u'\t' then
nns[opos] = u'\\'
nns[opos + 1] = u't'
opos += 2
else if c == u'\n' then
nns[opos] = u'\\'
nns[opos + 1] = u'n'
opos += 2
else if c == u'"' then
nns[opos] = u'\\'
nns[opos + 1] = u'"'
opos += 2
else if c == u'\'' then
nns[opos] = u'\\'
nns[opos + 1] = u'\''
opos += 2
else if c == u'\\' then
nns[opos] = u'\\'
nns[opos + 1] = u'\\'
opos += 2
else if c == u'?' then
var j = pos + 1
if j < length then
var next = its[j]
# We ignore `??'` because it will be escaped as `??\'`.
if
next == 0x21 or
next == 0x28 or
next == 0x29 or
next == 0x2D or
next == 0x2F or
next == 0x3C or
next == 0x3D or
next == 0x3E
then
nns[opos] = 0x5C
opos += 1
end
end
nns[opos] = 0x3F
opos += 1
else if c < 32 then
nns[opos] = u'\\'
nns[opos + 1] = u'0'
nns[opos + 2] = ((c & 0x38) >> 3) + u'0'
nns[opos + 3] = (c & 0x07) + u'0'
opos += 4
else
nns[opos] = c
opos += 1
end
pos += 1
end
return nns.to_s_unsafe(nlen, copy=false, clean=false)
end
redef fun [](index) do
var len = _length
# Statistically:
# * ~70% want the next char
# * ~23% want the previous
# * ~7% want the same char
#
# So it makes sense to shortcut early. And early is here.
var dpos = index - _position
var b = _bytepos
if dpos == 1 and index < len - 1 then
var its = _items
var c = its[b]
if c & 0x80 == 0x00 then
# We want the next, and current is easy.
# So next is easy to find!
b += 1
_position = index
_bytepos = b
# The rest will be done by `dpos==0` bellow.
dpos = 0
end
else if dpos == -1 and index > 1 then
var its = _items
var c = its[b-1]
if c & 0x80 == 0x00 then
# We want the previous, and it is easy.
b -= 1
dpos = 0
_position = index
_bytepos = b
return c.code_point
end
end
if dpos == 0 then
# We know what we want (+0 or +1) just get it now!
var its = _items
var c = its[b]
if c & 0x80 == 0x00 then return c.code_point
return items.char_at(b)
end
assert index >= 0 and index < len
return fetch_char_at(index)
end
# Gets a `Char` at `index` in `self`
#
# WARNING: Use at your own risks as no bound-checking is done
fun fetch_char_at(index: Int): Char do
var i = char_to_byte_index(index)
var items = _items
var b = items[i]
if b & 0x80 == 0x00 then return b.code_point
return items.char_at(i)
end
# If `self` contains only digits and alpha <= 'f', return the corresponding integer.
#
# assert "ff".to_hex == 255
redef fun to_hex(pos, ln) do
var res = 0
if pos == null then pos = 0
if ln == null then ln = length - pos
pos = char_to_byte_index(pos)
var its = _items
var max = pos + ln
for i in [pos .. max[ do
res <<= 4
res += its[i].code_point.from_hex
end
return res
end
redef fun copy_to_native(dst, n, src_off, dst_off) do
_items.copy_to(dst, n, first_byte + src_off, dst_off)
end
end
lib/core/text/flat.nit:37,1--406,3
redef class FlatText
redef fun append_to_bytes(b) do
var from = if self isa FlatString then first_byte else 0
if isset _items then b.append_ns_from(items, byte_length, from)
end
end
lib/core/bytes.nit:1015,1--1020,3
redef class FlatText
redef fun encode_base64 do return fast_cstring.encode_base64(byte_length).to_s
redef fun decode_base64 do return fast_cstring.decode_base64(byte_length)
redef fun is_base64 do return fast_cstring.is_base64(byte_length)
redef fun check_base64 do return fast_cstring.check_base64(byte_length)
end
lib/base64/base64.nit:242,1--250,3
redef class FlatText
redef fun char_to_byte_index(index) do
var ln = length
assert index >= 0
assert index < ln
# Find best insertion point
var delta_begin = index
var delta_end = (ln - 1) - index
var delta_cache = (position - index).abs
var min = delta_begin
var its = items
if delta_cache < min then min = delta_cache
if delta_end < min then min = delta_end
var ns_i: Int
var my_i: Int
if min == delta_begin then
ns_i = first_byte
my_i = 0
else if min == delta_cache then
ns_i = bytepos
my_i = position
else
ns_i = its.find_beginning_of_char_at(last_byte)
my_i = length - 1
end
var from = ns_i
ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
var after = ns_i
sys.index_len.inc((after - from).abs)
position = index
bytepos = ns_i
return ns_i
end
end
lib/text_stat/text_stat.nit:170,1--213,3