core :: U16String :: defaultinit
Returns an emptyU16String of capacity cap or a NULL U16String if no cap parameter is provided.
core :: U16String :: from_string
Returns a convertedU16String from a String
core $ U16String :: byte_length
Returns the number of UTF-8 code units (bytes) inself
serialization :: Serializable :: accept_json_serializer
Refinable service to customize the serialization of this class to JSONserialization :: Serializable :: accept_msgpack_attribute_counter
Hook to customize the behavior of theAttributeCounter
serialization :: Serializable :: accept_msgpack_serializer
Hook to customize the serialization of this class to MessagePackserialization :: Serializable :: add_to_bundle
Called by[]= to dynamically choose the appropriate method according
core :: Text :: binarydigest_to_bytes
Return aBytes by reading 0 and 1.
core :: Text :: capitalized
Returns a capitalizedself
core :: Object :: class_factory
Implementation used byget_class to create the specific class.
core :: Text :: copy_to_native
Copiesn bytes from self at src_offset into dest starting at dest_offset
serialization :: Serializable :: core_serialize_to
Actual serialization ofself to serializer
core :: Text :: decode_base64
Decodes the receiver string to base64 using a custom padding character.core :: Object :: defaultinit
core :: Finalizable :: defaultinit
core :: U16String :: defaultinit
Returns an emptyU16String of capacity cap or a NULL U16String if no cap parameter is provided.
core :: Writable :: defaultinit
mpi :: Sendable :: defaultinit
core :: Cloneable :: defaultinit
core :: Text :: defaultinit
core :: Pattern :: defaultinit
core :: Comparable :: defaultinit
core :: Text :: deserialize_json
Deserialize anullable Object from this JSON formatted string
core :: Text :: encode_base64
Encodes the receiver string to base64 using a custom padding character.core :: Text :: english_scoring
Scoreself according to english's letter frequency.
core :: Text :: escape_more_to_c
Escape additionnal characterscore :: Text :: escape_to_c
Escape" \ ', trigraphs and non printable characters using the rules of literal C strings and characters
core :: Text :: escape_to_js
Escape the content ofself to pass to JavaScript code
core :: Text :: escape_to_utf16
Returnsself with all characters escaped with their UTF-16 representation
core :: Text :: file_extension
Return right-most extension (without the dot)core :: Text :: file_lstat
The status of a file or of a symlink. see POSIX lstat(2).core :: Finalizable :: finalize
Liberate any resources held byself before the memory holding self is freed
serialization :: Serializable :: from_deserializer
Create an instance of this class from thedeserializer
core :: Text :: from_percent_encoding
Decodeself from percent (or URL) encoding to a clear string
core :: U16String :: from_string
Returns a convertedU16String from a String
core :: Text :: from_utf16_digit
Returns a UTF-16 escape valuecore :: Text :: from_utf16_escape
Returns the Unicode char escaped byself
core :: Text :: group_exists
Does the operating system know the group namedself?
core :: Text :: has_substring
Does self have a substringstr starting from position pos?
core :: Text :: hexdigest_to_bytes
Returns a newBytes instance with the digest as content
core :: Text :: html_escape
Escape the characters<, >, &, ", ' and / as HTML/XML entity references.
core :: Text :: http_download
Download the file at URLself to output_path with a simple HTTP request
core :: Text :: index_of_from
Gets the index of the first occurence of ´c´ starting from ´pos´core :: Text :: is_numeric
Is this string in a valid numeric format compatible withto_f?
core :: Object :: is_same_instance
Return true ifself and other are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself the same as other in a serialization context?
core :: Object :: is_same_type
Return true ifself and other have the same dynamic type.
core :: Text :: is_whitespace
Is the string non-empty but only made of whitespaces?core :: Text :: last_index_of_from
The index of the last occurrence of an element starting from pos (in reverse order).core :: Text :: levenshtein_distance
Return the Levenshtein distance between two stringscore :: Text :: light_gray
Make the text appear in light gray (or white) in a ANSI/VT100 terminal.serialization :: Serializable :: msgpack_extra_array_items
Hook to request a larger than usual metadata arraycore :: Object :: output_class_name
Display class name on stdout (debug only).core :: Text :: parse_bmfont
Parseself as an XML BMFont description file
core :: Text :: remove_all
Returns a copy ofself minus all occurences of pattern
core :: Text :: replace_first
Replace the first occurrence ofpattern with string
core :: Text :: search_all
Search all occurrences ofpattern into self.
core :: Pattern :: search_all_in
Search allself occurrences into s.
core :: Text :: search_from
Search the first occurence ofpattern after from.
core :: Pattern :: search_index_in
Searchself into s from a certain position.
core :: Text :: search_last
Search the last occurence of the textt.
core :: Text :: search_last_up_to
Search the last occurence of the textt before up_to.
serialization :: Serializable :: serialize_msgpack
Serializeself to MessagePack bytes
serialization :: Serializable :: serialize_to
Serializeself to serializer
serialization :: Serializable :: serialize_to_json
Serializeself to JSON
core :: Text :: simplify_path
Simplify a file path by remove useless., removing //, and resolving ..
core :: Text :: split_once_on
Splitself on the first occurence of pattern
core :: Text :: split_with
@deprecated alias forsplit
core :: Text :: strip_extension
Remove the trailingextension.
core :: Text :: strip_nullable
Strip thenullable prefix from the type name self
core :: Text :: strip_nullable_and_params
Strip thenullable prefix and the params from the type name self
core :: Text :: substring_from
Create a substring fromself beginning at the from position
core :: Text :: to_camel_case
Takes a snake caseself and converts it to camel case
core :: Text :: to_cmangle
Mangle a string to be a unique string only made of alphanumeric characters and underscores.core :: Text :: to_percent_encoding
Encodeself to percent (or URL) encoding
serialization :: Serializable :: to_pretty_json
Serializeself to plain pretty JSON
core :: Text :: to_program_name
Convert the path (self) to a program name.
core :: Text :: to_snake_case
Takes a camel caseself and converts it to snake case
core :: Text :: to_sql_date_string
Format the date represented byself into an escaped string for SQLite
core :: Text :: unescape_json
Removes JSON-escaping if necessary in a JSON stringcore :: Text :: unescape_nit
Return a string where Nit escape sequences are transformed.core :: Text :: unescape_to_bytes
Return aBytes instance where Nit escape sequences are transformed.
core :: Text :: user_exists
Does the operating system know the user namedself?
core :: Writable :: write_to_bytes
Likewrite_to but return a new Bytes (may be quite large)
core :: Writable :: write_to_file
Likewrite_to but take care of creating the file
core :: Writable :: write_to_string
Likewrite_to but return a new String (may be quite large).
serialization :: DirectSerializable
Instances of this class are not delayed and instead serialized immediatelySerializer::serialize
# UTF-16 encoded string
class U16String
super Finalizable
super Text
# Pointer to a `UChar *` string
private var uchar_string: UCharString
# Number of code units (aka UTF-16 encoded code units or `UChar`) allocated to `uchar_string`
private var capacity = 0
# Number of code units actually in `uchar_string`.
# `code_units` <= `capacity`.
private var code_units = 0
redef fun length: Int do return uchar_string.code_points(code_units)
# Returns an empty `U16String` of capacity `cap` or a NULL `U16String` if no `cap` parameter is provided.
# The `cap` argument is the number of code units (aka UTF-16 encoded characters or `UChar`) allocated to `uchar_string`.
# If the number of code units is known in advance, it can be provided with the `units` parameter.
init (cap: nullable Int, units: nullable Int) do
if cap == null then
uchar_string = new UCharString.nul
else
assert cap >= 0
if not units == null then
assert units <= cap
code_units = units
end
uchar_string = new UCharString.empty(cap)
capacity = cap
end
end
# Returns a converted `U16String` from a `String`
init from_string(source: String) do
var csource = source.to_cstring
var csource_length = source.byte_length
uchar_string = new UCharString.nul
var required_length = uchar_string.from_cstring(0, csource, csource_length)
uchar_string = new UCharString.empty(required_length)
uchar_string.from_cstring(required_length, csource, csource_length)
capacity = required_length
code_units = source.u16_length
end
# Copies the characters of `source` to `self`.
# A maximum of `self.capacity` code units will be copied to `self`.
# If a code point >0xFFFF has to be divided, it will not be copied.
fun copy_from(source: String) do
uchar_string.from_cstring(capacity, source.to_cstring, source.byte_length)
code_units = source.u16_length
end
redef fun chars do return new U16StringCharView(self)
redef fun[](index: Int): Char do
assert index >= 0 and index < length
var offset = 0
var c = '\0'
for i in [0..index] do
c = uchar_string.char_at_offset(offset, code_units)
if c.to_i > 0xFFFF then offset += 2 else offset +=1
end
return c
end
redef fun to_cstring: CString do
var cself = new CString.nul
var required_length = uchar_string.to_cstring(cself, 0, code_units)
cself = new CString(required_length + 1)
uchar_string.to_cstring(cself, required_length + 1, code_units)
return cself
end
# Returns the number of UTF-8 code units (bytes) in `self`
redef fun byte_length: Int do
var offset = 0
var l = 0
var c = '\0'
for i in chars do
c = uchar_string.char_at_offset(offset, code_units)
var b = c.to_i
if b > 0xFFFF then offset += 2 else offset +=1
if b <= 0x7F then
l += 1
else if b <= 0x7FF then
l += 2
else if b <= 0xD7FF or b > 0x10FFFF then
l += 0
else if b <= 0xFFFF then
l += 3
else
l += 4
end
end
return l
end
redef fun to_s: String do return to_cstring.to_s_with_length(byte_length)
redef fun finalize do uchar_string.free
end
lib/core/text/u16_string.nit:30,1--142,3