Cleans a CString if necessary

Property definitions

core :: flat $ CString :: clean_utf8
	# Cleans a CString if necessary
	fun clean_utf8(len: Int): FlatString do
		var replacements: nullable Array[Int] = null
		var end_length = len
		var pos = 0
		var chr_ln = 0
		var rem = len
		while rem > 0 do
			while rem >= 4 do
				var i = fetch_4_chars(pos)
				if i & 0x80808080u32 != 0u32 then break
				pos += 4
				chr_ln += 4
				rem -= 4
			end
			if rem == 0 then break
			var b = self[pos]
			if b & 0x80 == 0x00 then
				pos += 1
				chr_ln += 1
				rem -= 1
				continue
			end
			var nxst = length_of_char_at(pos)
			var ok_st: Bool
			if nxst == 1 then
				ok_st = b & 0x80 == 0
			else if nxst == 2 then
				ok_st = b & 0xE0 == 0xC0
			else if nxst == 3 then
				ok_st = b & 0xF0 == 0xE0
			else
				ok_st = b & 0xF8 == 0xF0
			end
			if not ok_st then
				if replacements == null then replacements = new Array[Int]
				replacements.add pos
				end_length += 2
				pos += 1
				rem -= 1
				chr_ln += 1
				continue
			end
			var ok_c: Bool
			var c = char_at(pos)
			var cp = c.code_point
			if nxst == 1 then
				ok_c = cp >= 0 and cp <= 0x7F
			else if nxst == 2 then
				ok_c = cp >= 0x80 and cp <= 0x7FF
			else if nxst == 3 then
				ok_c = cp >= 0x800 and cp <= 0xFFFF
				ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
			else
				ok_c = cp >= 0x10000 and cp <= 0x10FFFF
			end
			if not ok_c then
				if replacements == null then replacements = new Array[Int]
				replacements.add pos
				end_length += 2
				pos += 1
				chr_ln += 1
				rem -= 1
				continue
			end
			var clen = c.u8char_len
			pos += clen
			rem -= clen
			chr_ln += 1
		end
		var ret = self
		if end_length != len then
			ret = new CString(end_length)
			var old_repl = 0
			var off = 0
			var repls = replacements.as(not null)
			var r = repls.items.as(not null)
			var imax = repls.length
			for i in [0 .. imax[ do
				var repl_pos = r[i]
				var chkln = repl_pos - old_repl
				copy_to(ret, chkln, old_repl, off)
				off += chkln
				ret[off] = 0xEF
				ret[off + 1] = 0xBF
				ret[off + 2] = 0xBD
				old_repl = repl_pos + 1
				off += 3
			end
			copy_to(ret, len - old_repl, old_repl, off)
		end
		return new FlatString.full(ret, end_length, 0, chr_ln)
	end
lib/core/text/flat.nit:1345,2--1437,4