Codec supporting UTF-8

Introduced properties

Redefined properties

redef type SELF: UTF8Codec

core $ UTF8Codec :: SELF

Type of this instance, automatically specialized in every class
redef fun add_char_to(c: Char, stream: CString): Int

core $ UTF8Codec :: add_char_to

Adds a char c to bytes s
redef fun add_string_to(s: Text, b: Bytes): Int

core $ UTF8Codec :: add_string_to

Adds a string s coded as the supported encoding to b
redef fun char_max_size: Int

core $ UTF8Codec :: char_max_size

Maximum size of a character in supported encoding
redef fun codet_size: Int

core $ UTF8Codec :: codet_size

Size of a codet for the target encoding
redef fun decode_char(b: CString): Char

core $ UTF8Codec :: decode_char

Decodes a char from b to a Unicode code-point
redef fun decode_string(ns: CString, len: Int): String

core $ UTF8Codec :: decode_string

Decodes a string b to UTF-8
redef fun encode_char(c: Char): CString

core $ UTF8Codec :: encode_char

Transforms c to its representation in the format of self
redef fun encode_string(s: Text): Bytes

core $ UTF8Codec :: encode_string

Transforms s to the format of self
redef fun is_valid_char(ns: CString, len: Int): Int

core $ UTF8Codec :: is_valid_char

Is the sequence of bytes in ns at position a valid Char ?
redef fun max_lookahead: Int

core $ UTF8Codec :: max_lookahead

How many lookaheads might be required to decode a single char ?

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
abstract fun add_char_to(c: Char, s: CString): Int

core :: Codec :: add_char_to

Adds a char c to bytes s
abstract fun add_string_to(s: Text, b: Bytes): Int

core :: Codec :: add_string_to

Adds a string s coded as the supported encoding to b
abstract fun char_max_size: Int

core :: Codec :: char_max_size

Maximum size of a character in supported encoding
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
abstract fun codet_size: Int

core :: Codec :: codet_size

Size of a codet for the target encoding
abstract fun decode_char(b: CString): Char

core :: Codec :: decode_char

Decodes a char from b to a Unicode code-point
abstract fun decode_string(b: CString, len: Int): String

core :: Codec :: decode_string

Decodes a string b to UTF-8
abstract fun encode_char(c: Char): CString

core :: Codec :: encode_char

Transforms c to its representation in the format of self
abstract fun encode_string(s: Text): Bytes

core :: Codec :: encode_string

Transforms s to the format of self
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
abstract fun is_valid_char(ns: CString, position: Int): Int

core :: Codec :: is_valid_char

Is the sequence of bytes in ns at position a valid Char ?
abstract fun max_lookahead: Int

core :: Codec :: max_lookahead

How many lookaheads might be required to decode a single char ?
private intern fun native_class_name: CString

core :: Object :: native_class_name

The class name of the object in CString format.
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
package_diagram core::utf8::UTF8Codec UTF8Codec core::Codec Codec core::utf8::UTF8Codec->core::Codec core::Object Object core::Codec->core::Object ...core::Object ... ...core::Object->core::Object

Ancestors

interface Object

core :: Object

The root of the class hierarchy.

Parents

abstract class Codec

core :: Codec

Codes/Decodes entities from/to UTF-8

Class definitions

core $ UTF8Codec
# Codec supporting UTF-8
private class UTF8Codec
	super Codec

	redef fun char_max_size do return 4

	redef fun codet_size do return 1

	redef fun max_lookahead do return 4

	redef fun encode_char(c) do
		var ns = new CString(c.u8char_len)
		add_char_to(c, ns)
		return ns
	end

	redef fun add_char_to(c, stream) do
		c.u8char_tos(stream, c.u8char_len)
		return c.u8char_len
	end

	redef fun encode_string(s) do
		var buf = new Bytes.with_capacity(s.byte_length)
		add_string_to(s, buf)
		return buf
	end

	redef fun add_string_to(s, b) do
		s.append_to_bytes(b)
		return s.byte_length
	end

	redef fun is_valid_char(ns, len) do
		if len == 0 then return 2
		if not ns[0].is_valid_utf8_start then return 2
		for i in [1 .. len[ do if ns[i] & 0b1100_0000 != 0b1000_0000 then return 2
		if len != ns[0].u8len then return 1
		return 0
	end

	redef fun decode_char(b) do
		var c = b.char_at(0)
		var cp = c.code_point
		if cp >= 0xD800 and cp <= 0xDFFF then return 0xFFFD.code_point
		if cp == 0xFFFE or cp == 0xFFFF then return 0xFFFD.code_point
		return c
	end

	redef fun decode_string(ns, len) do
		assert len >= 0
		var ret = ns.to_s_unsafe(len, copy=false)
		var rit = ret.as(FlatString).items
		if rit == ns then
			var nns = new CString(len)
			rit.copy_to(nns, len, 0, 0)
			return nns.to_s_unsafe(ret.byte_length, ret.length, copy=false)
		end
		return ret
	end
end
lib/core/codecs/utf8.nit:22,1--81,3