core :: U16String :: _code_units
Number of code units actually inuchar_string
.
core :: U16String :: _uchar_string
Pointer to aUChar *
string
core :: U16String :: code_units
Number of code units actually inuchar_string
.
core :: U16String :: code_units=
Number of code units actually inuchar_string
.
core :: U16String :: defaultinit
Returns an emptyU16String
of capacity cap
or a NULL U16String
if no cap
parameter is provided.
core :: U16String :: from_string
Returns a convertedU16String
from a String
core :: U16String :: uchar_string=
Pointer to aUChar *
string
core $ U16String :: byte_length
Returns the number of UTF-8 code units (bytes) inself
core :: U16String :: _code_units
Number of code units actually inuchar_string
.
core :: Text :: _hash_cache
core :: U16String :: _uchar_string
Pointer to aUChar *
string
serialization :: Serializable :: accept_inspect_serializer_core
serialization :: Serializable :: accept_json_serializer
Refinable service to customize the serialization of this class to JSONserialization :: Serializable :: accept_msgpack_attribute_counter
Hook to customize the behavior of theAttributeCounter
serialization :: Serializable :: accept_msgpack_serializer
Hook to customize the serialization of this class to MessagePackserialization :: Serializable :: add_to_bundle
Called by[]=
to dynamically choose the appropriate method according
core :: Text :: apply_format
core :: Text :: binarydigest_to_bytes
Return aBytes
by reading 0 and 1.
core :: Text :: capitalized
Returns a capitalizedself
core :: Text :: chars_to_escape_csv
How many more bytes should be allocated for CSV escaping ?core :: Text :: chars_to_unescape_csv
How many bytes should be removed for CSV unescaping ?core :: Object :: class_factory
Implementation used byget_class
to create the specific class.
core :: U16String :: code_units
Number of code units actually inuchar_string
.
core :: U16String :: code_units=
Number of code units actually inuchar_string
.
core :: Text :: copy_to_native
Copiesn
bytes from self
at src_offset
into dest
starting at dest_offset
serialization :: Serializable :: core_serialize_to
Actual serialization ofself
to serializer
core :: Text :: decode_base64
Decodes the receiver string to base64 using a custom padding character.core :: Writable :: defaultinit
core :: Cloneable :: defaultinit
core :: Pattern :: defaultinit
core :: Comparable :: defaultinit
core :: Object :: defaultinit
core :: Text :: defaultinit
mpi :: Sendable :: defaultinit
core :: Finalizable :: defaultinit
core :: U16String :: defaultinit
Returns an emptyU16String
of capacity cap
or a NULL U16String
if no cap
parameter is provided.
core :: Text :: deserialize_json
Deserialize anullable Object
from this JSON formatted string
core :: Text :: encode_base64
Encodes the receiver string to base64 using a custom padding character.core :: Text :: english_scoring
Scoreself
according to english's letter frequency.
core :: Text :: escape_more_to_c
Escape additionnal characterscore :: Text :: escape_to_c
Escape"
\
'
, trigraphs and non printable characters using the rules of literal C strings and characters
core :: Text :: escape_to_csv
Escape the content ofself
for inclusion in a CSV document
core :: Text :: escape_to_gettext
core :: Text :: escape_to_js
Escape the content ofself
to pass to JavaScript code
core :: Text :: escape_to_utf16
Returnsself
with all characters escaped with their UTF-16 representation
core :: Text :: file_extension
Return right-most extension (without the dot)core :: Text :: file_lstat
The status of a file or of a symlink. see POSIX lstat(2).core :: Finalizable :: finalize
Liberate any resources held byself
before the memory holding self
is freed
serialization :: Serializable :: from_deserializer
Create an instance of this class from thedeserializer
core :: Text :: from_percent_encoding
Decodeself
from percent (or URL) encoding to a clear string
core :: U16String :: from_string
Returns a convertedU16String
from a String
core :: Text :: from_utf16_digit
Returns a UTF-16 escape valuecore :: Text :: from_utf16_escape
Returns the Unicode char escaped byself
core :: Text :: get_numext
Gets the numeric extension (i/u 8/16/32) inself
is present
core :: Text :: group_exists
Does the operating system know the group namedself
?
core :: Text :: has_substring
Does self have a substringstr
starting from position pos
?
core :: Text :: hash_cache
core :: Text :: hash_cache=
core :: Text :: hexdigest_to_bytes
Returns a newBytes
instance with the digest as content
core :: Text :: html_escape
Escape the characters<
, >
, &
, "
, '
and /
as HTML/XML entity references.
core :: Text :: html_link_prefixes
core :: Text :: http_download
Download the file at URLself
to output_path
with a simple HTTP request
self
core :: Text :: index_of_from
Gets the index of the first occurence of ´c´ starting from ´pos´core :: Text :: internal_to_dot
Writes self as a dot file on the hard drivecore :: Text :: is_numeric
Is this string in a valid numeric format compatible withto_f
?
core :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
core :: Text :: is_valid_html_tag
core :: Text :: is_whitespace
Is the string non-empty but only made of whitespaces?core :: Text :: json_need_escape
Doesself
need treatment from JSON to Nit ?
core :: Text :: json_to_nit_string
Escapesself
from a JSON string to a Nit string
core :: Text :: last_index_of_from
The index of the last occurrence of an element starting from pos (in reverse order).core :: Text :: levenshtein_distance
Return the Levenshtein distance between two stringscore :: Text :: light_gray
Make the text appear in light gray (or white) in a ANSI/VT100 terminal.core :: Text :: meta_from_fence
Extract string found at end of fence opening.serialization :: Serializable :: msgpack_extra_array_items
Hook to request a larger than usual metadata arraycore :: Object :: native_class_name
The class name of the object in CString format.core :: Object :: output_class_name
Display class name on stdout (debug only).core :: Text :: parse_bmfont
Parseself
as an XML BMFont description file
core :: Text :: read_md_link
Read a markdown link address and append it to theout
buffer.
core :: Text :: read_md_link_id
Read a markdown link text and append it to theout
buffer.
core :: Text :: read_raw_until
Readself
as raw text until nend
and append it to the out
buffer.
core :: Text :: read_until
Readself
as raw text until nend
and append it to the out
buffer.
core :: Text :: read_until
Readself
until nend
and append it to the out
buffer.
core :: Text :: read_xml_until
Readself
as XML until to
and append it to the out
buffer.
core :: Text :: remove_all
Returns a copy ofself
minus all occurences of pattern
core :: Text :: replace_first
Replace the first occurrence ofpattern
with string
core :: Text :: run_js_native
core :: Text :: search_all
Search all occurrences ofpattern
into self.
core :: Pattern :: search_all_in
Search allself
occurrences into s
.
core :: Text :: search_from
Search the first occurence ofpattern
after from
.
core :: Pattern :: search_index_in
Searchself
into s
from a certain position.
core :: Text :: search_last
Search the last occurence of the textt
.
core :: Text :: search_last_up_to
Search the last occurence of the textt
before up_to
.
serialization :: Serializable :: serialize_msgpack
Serializeself
to MessagePack bytes
serialization :: Serializable :: serialize_to
Serializeself
to serializer
serialization :: Serializable :: serialize_to_json
Serializeself
to JSON
serialization :: Serializable :: serialize_to_or_delay
Accept references or force direct serialization (usingserialize_to
)
core :: Text :: simplify_path
Simplify a file path by remove useless.
, removing //
, and resolving ..
core :: Text :: skip_spaces
Get the position of the next non-space character.core :: Text :: split_once_on
Splitself
on the first occurence of pattern
core :: Text :: split_with
@deprecated alias forsplit
core :: Text :: strip_extension
Remove the trailingextension
.
core :: Text :: strip_nullable
Strip thenullable
prefix from the type name self
core :: Text :: strip_nullable_and_params
Strip thenullable
prefix and the params from the type name self
core :: Text :: strip_numext
Removes the numeric extension if presentcore :: Text :: strip_numhead
Removes the numeric head ofself
if present
core :: Text :: substring_from
Create a substring fromself
beginning at the from
position
core :: Text :: substrings
Iterates on the substrings of self if anycore :: Text :: to_camel_case
Takes a snake caseself
and converts it to camel case
core :: Text :: to_cmangle
Mangle a string to be a unique string only made of alphanumeric characters and underscores.core :: Text :: to_percent_encoding
Encodeself
to percent (or URL) encoding
serialization :: Serializable :: to_pretty_json
Serializeself
to plain pretty JSON
core :: Text :: to_program_name
Convert the path (self
) to a program name.
core :: Text :: to_snake_case
Takes a camel caseself
and converts it to snake case
core :: Text :: to_sql_date_string
Format the date represented byself
into an escaped string for SQLite
core :: U16String :: uchar_string=
Pointer to aUChar *
string
core :: Text :: unescape_csv
Unescape the content ofself
from CSV format to Nit String
core :: Text :: unescape_json
Removes JSON-escaping if necessary in a JSON stringcore :: Text :: unescape_nit
Return a string where Nit escape sequences are transformed.core :: Text :: unescape_to_bytes
Return aBytes
instance where Nit escape sequences are transformed.
core :: Text :: user_exists
Does the operating system know the user namedself
?
core :: Text :: write_native_to
core :: Writable :: write_to_bytes
Likewrite_to
but return a new Bytes (may be quite large)
core :: Writable :: write_to_file
Likewrite_to
but take care of creating the file
core :: Writable :: write_to_string
Likewrite_to
but return a new String (may be quite large).
serialization :: DirectSerializable
Instances of this class are not delayed and instead serialized immediatelySerializer::serialize
# UTF-16 encoded string
class U16String
super Finalizable
super Text
# Pointer to a `UChar *` string
private var uchar_string: UCharString
# Number of code units (aka UTF-16 encoded code units or `UChar`) allocated to `uchar_string`
private var capacity = 0
# Number of code units actually in `uchar_string`.
# `code_units` <= `capacity`.
private var code_units = 0
redef fun length: Int do return uchar_string.code_points(code_units)
# Returns an empty `U16String` of capacity `cap` or a NULL `U16String` if no `cap` parameter is provided.
# The `cap` argument is the number of code units (aka UTF-16 encoded characters or `UChar`) allocated to `uchar_string`.
# If the number of code units is known in advance, it can be provided with the `units` parameter.
init (cap: nullable Int, units: nullable Int) do
if cap == null then
uchar_string = new UCharString.nul
else
assert cap >= 0
if not units == null then
assert units <= cap
code_units = units
end
uchar_string = new UCharString.empty(cap)
capacity = cap
end
end
# Returns a converted `U16String` from a `String`
init from_string(source: String) do
var csource = source.to_cstring
var csource_length = source.byte_length
uchar_string = new UCharString.nul
var required_length = uchar_string.from_cstring(0, csource, csource_length)
uchar_string = new UCharString.empty(required_length)
uchar_string.from_cstring(required_length, csource, csource_length)
capacity = required_length
code_units = source.u16_length
end
# Copies the characters of `source` to `self`.
# A maximum of `self.capacity` code units will be copied to `self`.
# If a code point >0xFFFF has to be divided, it will not be copied.
fun copy_from(source: String) do
uchar_string.from_cstring(capacity, source.to_cstring, source.byte_length)
code_units = source.u16_length
end
redef fun chars do return new U16StringCharView(self)
redef fun[](index: Int): Char do
assert index >= 0 and index < length
var offset = 0
var c = '\0'
for i in [0..index] do
c = uchar_string.char_at_offset(offset, code_units)
if c.to_i > 0xFFFF then offset += 2 else offset +=1
end
return c
end
redef fun to_cstring: CString do
var cself = new CString.nul
var required_length = uchar_string.to_cstring(cself, 0, code_units)
cself = new CString(required_length + 1)
uchar_string.to_cstring(cself, required_length + 1, code_units)
return cself
end
# Returns the number of UTF-8 code units (bytes) in `self`
redef fun byte_length: Int do
var offset = 0
var l = 0
var c = '\0'
for i in chars do
c = uchar_string.char_at_offset(offset, code_units)
var b = c.to_i
if b > 0xFFFF then offset += 2 else offset +=1
if b <= 0x7F then
l += 1
else if b <= 0x7FF then
l += 2
else if b <= 0xD7FF or b > 0x10FFFF then
l += 0
else if b <= 0xFFFF then
l += 3
else
l += 4
end
end
return l
end
redef fun to_s: String do return to_cstring.to_s_with_length(byte_length)
redef fun finalize do uchar_string.free
end
lib/core/text/u16_string.nit:30,1--142,3