All kinds of array-based text representations.

# All kinds of array-based text representations.
abstract class FlatText
	super Text

	# Underlying CString (`char*`)
	#
	# Warning: Might be void in some subclasses, be sure to check
	# if set before using it.
	var items: CString is noinit

	# Returns a char* starting at position `first_byte`
	#
	# WARNING: If you choose to use this service, be careful of the following.
	#
	# Strings and CString are *ideally* always allocated through a Garbage Collector.
	# Since the GC tracks the use of the pointer for the beginning of the char*, it may be
	# deallocated at any moment, rendering the pointer returned by this function invalid.
	# Any access to freed memory may very likely cause undefined behaviour or a crash.
	# (Failure to do so will most certainly result in long and painful debugging hours)
	#
	# The only safe use of this pointer is if it is ephemeral (e.g. read in a C function
	# then immediately return).
	#
	# As always, do not modify the content of the String in C code, if this is what you want
	# copy locally the char* as Nit Strings are immutable.
	fun fast_cstring: CString is abstract

	redef var length = 0

	redef var byte_length = 0

	redef fun output
	do
		var i = 0
		while i < length do
			items[i].output
			i += 1
		end
	end

	redef fun copy_to_native(dest, n, src_offset, dest_offset) do
		items.copy_to(dest, n, src_offset, dest_offset)
	end
end

lib/core/text/abstract_text.nit:1400,1--1443,3

core :: flat $ FlatText

redef class FlatText

	# First byte of the CString
	protected fun first_byte: Int do return 0

	# Last byte of the CString
	protected fun last_byte: Int do return first_byte + _byte_length - 1

	# Cache of the latest position (char) explored in the string
	var position: Int = 0

	# Cached position (bytes) in the CString underlying the String
	var bytepos: Int = 0

	# Index of the character `index` in `_items`
	fun char_to_byte_index(index: Int): Int do
		var dpos = index - _position
		var b = _bytepos
		var its = _items

		if dpos == 1 then
			if its[b] & 0x80 == 0x00 then
				b += 1
			else
				b += its.length_of_char_at(b)
			end
			_bytepos = b
			_position = index
			return b
		end
		if dpos == -1 then
			b = its.find_beginning_of_char_at(b - 1)
			_bytepos = b
			_position = index
			return b
		end
		if dpos == 0 then return b

		var ln = _length
		var pos = _position
		# Find best insertion point
		var delta_begin = index
		var delta_end = (ln - 1) - index
		var delta_cache = (pos - index).abs
		var min = delta_begin

		if delta_cache < min then min = delta_cache
		if delta_end < min then min = delta_end

		var ns_i: Int
		var my_i: Int

		if min == delta_cache then
			ns_i = _bytepos
			my_i = pos
		else if min == delta_begin then
			ns_i = first_byte
			my_i = 0
		else
			ns_i = its.find_beginning_of_char_at(last_byte)
			my_i = _length - 1
		end

		ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)

		_position = index
		_bytepos = ns_i

		return ns_i
	end

	# By escaping `self` to HTML, how many more bytes will be needed ?
	fun chars_to_html_escape: Int do
		var its = _items
		var max = last_byte
		var pos = first_byte
		var endlen = 0
		while pos <= max do
			var c = its[pos]
			if c == u'<' then
				endlen += 3
			else if c == u'>' then
				endlen += 3
			else if c == u'&' then
				endlen += 4
			else if c == u'"' then
				endlen += 4
			else if c == u'\'' then
				endlen += 4
			else if c == 0x2F then
				endlen += 4
			end
			pos += 1
		end
		return endlen
	end

	redef fun html_escape
	do
		var extra = chars_to_html_escape
		if extra == 0 then return to_s
		var its = _items
		var max = last_byte
		var pos = first_byte
		var nlen = extra + _byte_length
		var nits = new CString(nlen)
		var outpos = 0
		while pos <= max do
			var c = its[pos]
			# Special codes:
			# Some HTML characters are used as meta-data, they need
			# to be replaced by an HTML-Escaped equivalent
			if c == u'<' then
				nits[outpos] = u'&'
				nits[outpos + 1] = u'l'
				nits[outpos + 2] = u't'
				nits[outpos + 3] = u';'
				outpos += 4
			else if c == u'>' then
				nits[outpos] = u'&'
				nits[outpos + 1] = u'g'
				nits[outpos + 2] = u't'
				nits[outpos + 3] = u';'
				outpos += 4
			else if c == u'&' then
				nits[outpos] = u'&'
				nits[outpos + 1] = u'a'
				nits[outpos + 2] = u'm'
				nits[outpos + 3] = u'p'
				nits[outpos + 4] = u';'
				outpos += 5
			else if c == u'"' then
				nits[outpos] = u'&'
				nits[outpos + 1] = u'#'
				nits[outpos + 2] = u'3'
				nits[outpos + 3] = u'4'
				nits[outpos + 4] = u';'
				outpos += 5
			else if c == u'\'' then
				nits[outpos] = u'&'
				nits[outpos + 1] = u'#'
				nits[outpos + 2] = u'3'
				nits[outpos + 3] = u'9'
				nits[outpos + 4] = u';'
				outpos += 5
			else if c == u'/' then
				nits[outpos] = u'&'
				nits[outpos + 1] = u'#'
				nits[outpos + 2] = u'4'
				nits[outpos + 3] = u'7'
				nits[outpos + 4] = u';'
				outpos += 5
			else
				nits[outpos] = c
				outpos += 1
			end
			pos += 1
		end
		var s = new FlatString.with_infos(nits, nlen, 0)
		return s
	end

	# By escaping `self` to C, how many more bytes will be needed ?
	#
	# This enables a double-optimization in `escape_to_c` since if this
	# method returns 0, then `self` does not need escaping and can be
	# returned as-is
	fun chars_to_escape_to_c: Int do
		var its = _items
		var max = last_byte
		var pos = first_byte
		var req_esc = 0
		while pos <= max do
			var c = its[pos]
			if c == u'\n' then
				req_esc += 1
			else if c == u'\t' then
				req_esc += 1
			else if c == u'"' then
				req_esc += 1
			else if c == u'\'' then
				req_esc += 1
			else if c == u'\\' then
				req_esc += 1
			else if c == u'?' then
				var j = pos + 1
				if j < length then
					var next = its[j]
					# We ignore `??'` because it will be escaped as `??\'`.
					if
						next == 0x21 or
						next == 0x28 or
						next == 0x29 or
						next == 0x2D or
						next == 0x2F or
						next == 0x3C or
						next == 0x3D or
						next == 0x3E
					then req_esc += 1
				end
			else if c < 32 then
				req_esc += 3
			end
			pos += 1
		end
		return req_esc
	end

	redef fun escape_to_c do
		var ln_extra = chars_to_escape_to_c
		if ln_extra == 0 then return self.to_s
		var its = _items
		var max = last_byte
		var nlen = _byte_length + ln_extra
		var nns = new CString(nlen)
		var pos = first_byte
		var opos = 0
		while pos <= max do
			var c = its[pos]
			# Special codes:
			#
			# Any byte with value < 32 is a control character
			# All their uses will be replaced by their octal
			# value in C.
			#
			# There are two exceptions however:
			#
			# * 0x09 => \t
			# * 0x0A => \n
			#
			# Aside from the code points above, the following are:
			#
			# * 0x22 => \"
			# * 0x27 => \'
			# * 0x5C => \\
			if c == u'\t' then
				nns[opos] = u'\\'
				nns[opos + 1] = u't'
				opos += 2
			else if c == u'\n' then
				nns[opos] = u'\\'
				nns[opos + 1] = u'n'
				opos += 2
			else if c == u'"' then
				nns[opos] = u'\\'
				nns[opos + 1] = u'"'
				opos += 2
			else if c == u'\'' then
				nns[opos] = u'\\'
				nns[opos + 1] = u'\''
				opos += 2
			else if c == u'\\' then
				nns[opos] = u'\\'
				nns[opos + 1] = u'\\'
				opos += 2
			else if c == u'?' then
				var j = pos + 1
				if j < length then
					var next = its[j]
					# We ignore `??'` because it will be escaped as `??\'`.
					if
						next == 0x21 or
						next == 0x28 or
						next == 0x29 or
						next == 0x2D or
						next == 0x2F or
						next == 0x3C or
						next == 0x3D or
						next == 0x3E
					then
						nns[opos] = 0x5C
						opos += 1
					end
				end
				nns[opos] = 0x3F
				opos += 1
			else if c < 32 then
				nns[opos] = u'\\'
				nns[opos + 1] = u'0'
				nns[opos + 2] = ((c & 0x38) >> 3) + u'0'
				nns[opos + 3] = (c & 0x07) + u'0'
				opos += 4
			else
				nns[opos] = c
				opos += 1
			end
			pos += 1
		end
		return nns.to_s_unsafe(nlen, copy=false, clean=false)
	end

	redef fun [](index) do
		var len = _length

		# Statistically:
		# * ~70% want the next char
		# * ~23% want the previous
		# * ~7% want the same char
		#
		# So it makes sense to shortcut early. And early is here.
		var dpos = index - _position
		var b = _bytepos
		if dpos == 1 and index < len - 1 then
			var its = _items
			var c = its[b]
			if c & 0x80 == 0x00 then
				# We want the next, and current is easy.
				# So next is easy to find!
				b += 1
				_position = index
				_bytepos = b
				# The rest will be done by `dpos==0` bellow.
				dpos = 0
			end
		else if dpos == -1 and index > 1 then
			var its = _items
			var c = its[b-1]
			if c & 0x80 == 0x00 then
				# We want the previous, and it is easy.
				b -= 1
				dpos = 0
				_position = index
				_bytepos = b
				return c.code_point
			end
		end
		if dpos == 0 then
			# We know what we want (+0 or +1) just get it now!
			var its = _items
			var c = its[b]
			if c & 0x80 == 0x00 then return c.code_point
			return items.char_at(b)
		end

		assert index >= 0 and index < len
		return fetch_char_at(index)
	end

	# Gets a `Char` at `index` in `self`
	#
	# WARNING: Use at your own risks as no bound-checking is done
	fun fetch_char_at(index: Int): Char do
		var i = char_to_byte_index(index)
		var items = _items
		var b = items[i]
		if b & 0x80 == 0x00 then return b.code_point
		return items.char_at(i)
	end

	# If `self` contains only digits and alpha <= 'f', return the corresponding integer.
	#
	#     assert "ff".to_hex == 255
	redef fun to_hex(pos, ln) do
		var res = 0
		if pos == null then pos = 0
		if ln == null then ln = length - pos
		pos = char_to_byte_index(pos)
		var its = _items
		var max = pos + ln
		for i in [pos .. max[ do
			res <<= 4
			res += its[i].code_point.from_hex
		end
		return res
	end

	redef fun copy_to_native(dst, n, src_off, dst_off) do
		_items.copy_to(dst, n, first_byte + src_off, dst_off)
	end
end

lib/core/text/flat.nit:37,1--406,3

core :: bytes $ FlatText

redef class FlatText
	redef fun append_to_bytes(b) do
		var from = if self isa FlatString then first_byte else 0
		if isset _items then b.append_ns_from(items, byte_length, from)
	end
end

lib/core/bytes.nit:1015,1--1020,3

base64 :: base64 $ FlatText

redef class FlatText
	redef fun encode_base64 do return fast_cstring.encode_base64(byte_length).to_s

	redef fun decode_base64 do return fast_cstring.decode_base64(byte_length)

	redef fun is_base64 do return fast_cstring.is_base64(byte_length)

	redef fun check_base64 do return fast_cstring.check_base64(byte_length)
end

lib/base64/base64.nit:242,1--250,3

text_stat :: text_stat $ FlatText

redef class FlatText
	redef fun char_to_byte_index(index) do
		var ln = length
		assert index >= 0
		assert index < ln

		# Find best insertion point
		var delta_begin = index
		var delta_end = (ln - 1) - index
		var delta_cache = (position - index).abs
		var min = delta_begin
		var its = items

		if delta_cache < min then min = delta_cache
		if delta_end < min then min = delta_end

		var ns_i: Int
		var my_i: Int

		if min == delta_begin then
			ns_i = first_byte
			my_i = 0
		else if min == delta_cache then
			ns_i = bytepos
			my_i = position
		else
			ns_i = its.find_beginning_of_char_at(last_byte)
			my_i = length - 1
		end

		var from = ns_i

		ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)

		var after = ns_i

		sys.index_len.inc((after - from).abs)

		position = index
		bytepos = ns_i

		return ns_i
	end
end

lib/text_stat/text_stat.nit:170,1--213,3

json :: static $ FlatText

redef class FlatText
	redef fun json_need_escape do
		var its = items
		for i in [first_byte .. last_byte] do
			if its[i] == 0x5C then return true
		end
		return false
	end
end

lib/json/static.nit:127,1--135,3

abstract class FlatText

Summary

All kinds of array-based text representations.

Introduced properties

byte_length=

bytepos

bytepos=

char_to_byte_index

chars_to_escape_to_c

chars_to_html_escape

defaultinit

fast_cstring

fetch_char_at

first_byte

items

items=

last_byte

length=

position

position=

Redefined properties

SELF

[]

append_to_bytes

byte_length

char_to_byte_index

check_base64

copy_to_native

copy_to_native

decode_base64

encode_base64

escape_to_c

html_escape

is_base64

length

output

to_hex

All kinds of array-based text representations.

Introduced properties

protected fun byte_length=(byte_length: Int)

fun bytepos: Int

protected fun bytepos=(bytepos: Int)

fun char_to_byte_index(index: Int): Int

fun chars_to_escape_to_c: Int

fun chars_to_html_escape: Int

init defaultinit

abstract fun fast_cstring: CString

fun fetch_char_at(index: Int): Char

protected fun first_byte: Int

fun items: CString

protected fun items=(items: CString)

protected fun last_byte: Int

protected fun length=(length: Int)

fun position: Int

protected fun position=(position: Int)

Redefined properties

redef type SELF: FlatText

redef fun [](index: Int): Char

redef fun append_to_bytes(b: Bytes)

redef fun byte_length: Int

redef fun char_to_byte_index(index: Int): Int

redef fun check_base64: nullable Error

redef fun copy_to_native(dst: CString, n: Int, src_off: Int, dst_off: Int)

redef fun copy_to_native(dest: CString, n: Int, src_offset: Int, dest_offset: Int)

redef fun decode_base64: Bytes

redef fun encode_base64: String

redef fun escape_to_c: String

redef fun html_escape: String

redef fun is_base64: Bool

redef fun length: Int

redef fun output

redef fun to_hex(pos: nullable Int, ln: nullable Int): Int

Summary

All properties

!=

*

+

/

<

<=