From 542a74f74924f1bd7c91689c95adf23f8270a798 Mon Sep 17 00:00:00 2001
From: Lucas Bajolet <r4pass@hotmail.com>
Date: Tue, 2 Jun 2015 14:49:19 -0400
Subject: [PATCH] lib/: removed UTF-8 related modules in anticipation of the
 integration in stdlib

Signed-off-by: Lucas Bajolet <r4pass@hotmail.com>
---
 lib/string_experimentations/README.md              |   24 -
 .../string_experimentations.nit                    |   18 -
 lib/string_experimentations/utf8.nit               |  427 -----------
 lib/string_experimentations/utf8_noindex.nit       |  742 --------------------
 tests/sav/utf_test.res                             |   11 -
 tests/utf_test.nit                                 |   42 --
 6 files changed, 1264 deletions(-)
 delete mode 100644 lib/string_experimentations/README.md
 delete mode 100644 lib/string_experimentations/string_experimentations.nit
 delete mode 100644 lib/string_experimentations/utf8.nit
 delete mode 100644 lib/string_experimentations/utf8_noindex.nit
 delete mode 100644 tests/sav/utf_test.res
 delete mode 100644 tests/utf_test.nit

diff --git a/lib/string_experimentations/README.md b/lib/string_experimentations/README.md
deleted file mode 100644
index 7cbce13..0000000
--- a/lib/string_experimentations/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-This project is a collection of modules used to experiment on different variations of Text and its subclasses.
-This is only temporary as these modules will eventually be merged into standard library or discarded for those bringing no real improvements to the language.
-
-The modules contained here are :
-
- * utf8: A draft of implementation of UTF-8 as internal encoding for Strings with automatic indexing.
- * utf8_no_index: Another draft of implementation of UTF-8, this time without indexing.
-
-TODO :
-
- * utf8:
-  * Support for the whole API of Text
-  * Any kind of normalization form for equality (NFC probably)
-  * Compatibility versions of equality test
-  * Locale support
-  * Comparisons
-  * to_upper/lower fully-compatible with Unicode
-
- * utf8_no_index:
-  * Add cache for the last indexed character - DONE
-  * Two-way iteration - DONE
-  * Intelligent indexed access (calculating the nearest point of insertion, i.e. begin, end, or cache) - DONE
-  * UnicodeChar as universal type
-  * UnicodeChar => Char and Char => Byte
diff --git a/lib/string_experimentations/string_experimentations.nit b/lib/string_experimentations/string_experimentations.nit
deleted file mode 100644
index 1eeb10d..0000000
--- a/lib/string_experimentations/string_experimentations.nit
+++ /dev/null
@@ -1,18 +0,0 @@
-# This file is part of NIT ( http://www.nitlanguage.org ).
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# General module for all kinds of string experimentations
-module string_experimentations
-
-import utf8
diff --git a/lib/string_experimentations/utf8.nit b/lib/string_experimentations/utf8.nit
deleted file mode 100644
index b3a2450..0000000
--- a/lib/string_experimentations/utf8.nit
+++ /dev/null
@@ -1,427 +0,0 @@
-# This file is part of NIT ( http://www.nitlanguage.org ).
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Introduces UTF-8 as internal encoding for Strings in Nit.
-module utf8
-
-intrude import standard::string
-intrude import standard::file
-
-in "C Header" `{
-
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-
-typedef struct {
-	long pos;
-	char* ns;
-} UTF8Char;
-
-`}
-
-# UTF-8 char as defined in RFC-3629, e.g. 1-4 Bytes
-#
-# A UTF-8 char has its bytes stored in a NativeString (char*)
-extern class UTF8Char `{ UTF8Char* `}
-
-	new(pos: Int, ns: NativeString) `{
-		UTF8Char* u = malloc(sizeof(UTF8Char));
-		u->pos = pos;
-		u->ns = ns;
-		return u;
-	`}
-
-	# Real length of the char in UTF8
-	#
-	# As per the specification :
-	#
-	# ~~~raw
-	#  Length  |        UTF-8 octet sequence
-	#          |              (binary)
-	# ---------+-------------------------------------------------
-	#  1       | 0xxxxxxx
-	#  2       | 110xxxxx 10xxxxxx
-	#  3       | 1110xxxx 10xxxxxx 10xxxxxx
-	#  4       | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-	# ~~~
-	private fun len: Int `{
-		char* ns = self->ns;
-		int pos = self->pos;
-		char nspos = ns[pos];
-		if((nspos & 0x80) == 0x00){ return 1;}
-		if((nspos & 0xE0) == 0xC0){ return 2;}
-		if((nspos & 0xF0) == 0xE0){ return 3;}
-		if((nspos & 0xF7) == 0xF0){ return 4;}
-		// Invalid character
-		return 1;
-	`}
-
-	# Position in containing NativeString
-	private fun pos: Int `{
-		return self->pos;
-	`}
-
-	private fun pos=(p: Int) `{self->pos = p;`}
-
-	# C char* wrapping the char
-	fun ns: NativeString `{
-		return self->ns;
-	`}
-
-	# Returns the Unicode code point representing the character
-	#
-	# Note : A unicode character might not be a visible glyph, but it will be used to determine canonical equivalence
-	fun code_point: Int import UTF8Char.len `{
-		switch(UTF8Char_len(self)){
-			case 1:
-				return (long)(0x7F & (unsigned char)self->ns[self->pos]);
-			case 2:
-				return 0 | ((0x1F & (unsigned char)self->ns[self->pos]) << 6) | (0x3F & (unsigned char)self->ns[self->pos+1]);
-			case 3:
-				return 0 | ((0x0F & (unsigned char)self->ns[self->pos]) << 12) |
-				((0x3F & (unsigned char)self->ns[self->pos+1]) << 6) |
-				(0x3F & (unsigned char)self->ns[self->pos+2]);
-			case 4:
-				return 0 | ((0x07 & (unsigned char)self->ns[self->pos]) << 18) |
-				((0x3F & (unsigned char)self->ns[self->pos+1]) << 12) |
-				((0x3F & (unsigned char)self->ns[self->pos+2]) << 6) |
-				(0x3F & (unsigned char)self->ns[self->pos+3]);
-		}
-	`}
-
-	# Returns an upper-case version of self
-	#
-	# NOTE : Works only on ASCII chars
-	# TODO : Support unicode for to_upper
-	fun to_upper: UTF8Char import UTF8Char.code_point `{
-		int cp = UTF8Char_code_point(self);
-		if(cp < 97 || cp > 122){ return self; }
-		char* ns = malloc(2);
-		ns[1] = '\0';
-		char c = self->ns[self->pos];
-		ns[0] = c - 32;
-		UTF8Char* ret = malloc(sizeof(UTF8Char));
-		ret->ns = ns;
-		ret->pos = 0;
-		return ret;
-	`}
-
-	# Returns an lower-case version of self
-	#
-	# NOTE : Works only on ASCII chars
-	# TODO : Support unicode for to_upper
-	fun to_lower: UTF8Char import UTF8Char.code_point `{
-		int cp = UTF8Char_code_point(self);
-		if(cp < 65 || cp > 90){ return self; }
-		char* ns = malloc(2);
-		ns[1] = '\0';
-		char c = self->ns[self->pos];
-		ns[0] = c + 32;
-		UTF8Char* ret = malloc(sizeof(UTF8Char));
-		ret->ns = ns;
-		ret->pos = 0;
-		return ret;
-	`}
-
-	redef fun ==(o)
-	do
-		if o isa Char then
-			if len != 1 then return false
-			if code_point == o.ascii then return true
-		else if o isa UTF8Char then
-			if len != o.len then return false
-			if code_point == o.code_point then return true
-		end
-		return false
-	end
-
-	redef fun output import UTF8Char.code_point `{
-		switch(UTF8Char_len(self)){
-			case 1:
-				printf("%c", self->ns[self->pos]);
-				break;
-			case 2:
-				printf("%c%c", self->ns[self->pos], self->ns[self->pos + 1]);
-				break;
-			case 3:
-				printf("%c%c%c", self->ns[self->pos], self->ns[self->pos + 1], self->ns[self->pos + 2]);
-				break;
-			case 4:
-				printf("%c%c%c%c", self->ns[self->pos], self->ns[self->pos + 1], self->ns[self->pos + 2], self->ns[self->pos + 3]);
-				break;
-		}
-	`}
-
-	redef fun to_s import NativeString.to_s_with_length `{
-		int len = utf8___UTF8Char_len___impl(self);
-		char* r = malloc(len + 1);
-		r[len] = '\0';
-		char* src = (self->ns + self->pos);
-		memcpy(r, src, len);
-		return NativeString_to_s_with_length(r, len);
-	`}
-end
-
-# A `StringIndex` is used to keep track of the position of characters in a `FlatString` object
-#
-# It becomes mandatory for UTF-8 strings since characters do not have a fixed size.
-private extern class StringIndex `{ UTF8Char* `}
-
-	new(size: Int) `{ return malloc(size*sizeof(UTF8Char)); `}
-
-	# Sets the character at `index` as `item`
-	fun []=(index: Int, item: UTF8Char) `{ self[index] = *item; `}
-
-	# Gets the character at position `id`
-	fun [](id: Int): UTF8Char `{ return &self[id]; `}
-
-	# Copies a part of self starting at index `my_from` of length `length` into `other`, starting at `its_from`
-	fun copy_to(other: StringIndex, my_from: Int, its_from: Int, length: Int)`{
-		UTF8Char* myfrom = self + my_from*(sizeof(UTF8Char));
-		UTF8Char* itsfrom = other + its_from*(sizeof(UTF8Char));
-		memcpy(itsfrom, myfrom, length);
-	`}
-end
-
-redef class FlatString
-
-	# Index of the characters of the FlatString
-	private var index: StringIndex
-
-	# Length in bytes of the string (e.g. the length of the C string)
-	var bytelen: Int
-
-	private init with_infos_index(items: NativeString, len: Int, index_from: Int, index_to: Int, index: StringIndex, bytelen: Int)
-	do
-		self.items = items
-		length = len
-		self.index_from = index_from
-		self.index_to = index_to
-		self.index = index
-		self.bytelen = bytelen
-	end
-
-	redef fun to_cstring
-	do
-		if real_items != null then return real_items.as(not null)
-		var new_items = new NativeString(bytelen + 1)
-		self.items.copy_to(new_items, bytelen, index[index_from].pos, 0)
-		new_items[bytelen] = '\0'
-		self.real_items = new_items
-		return new_items
-	end
-
-	redef fun substring(from, count)
-	do
-		assert count >= 0
-
-		if from < 0 then
-			count += from
-			if count < 0 then count = 0
-			from = 0
-		end
-
-		if count == 0 then return empty
-
-		var real_from = index_from + from
-		var real_to = real_from + count - 1
-
-		if real_to > index_to then real_to = index_to
-
-		var sub_bytelen = (index[real_to].pos - index[from].pos) + index[from].len
-
-		return new FlatString.with_infos_index(items, count, real_from, real_to, index, sub_bytelen)
-	end
-
-	redef fun reversed
-	do
-		var native = new NativeString(self.bytelen + 1)
-		var length = self.length
-		var index = self.index
-		var pos = 0
-		var i = 0
-		var ipos = bytelen
-		var new_index = new StringIndex(length)
-		var pos_index = length
-		while i < length do
-			var uchar = index[i]
-			var uchar_len = uchar.len
-			ipos -= uchar_len
-			new_index[pos_index] = new UTF8Char(ipos, native)
-			pos_index -= 1
-			items.copy_to(native, uchar_len, pos, ipos)
-			pos += uchar_len
-			i += 1
-		end
-		return new FlatString.with_infos_index(native, length, 0, length-1, new_index, bytelen)
-	end
-
-	redef fun *(i)
-	do
-		assert i >= 0
-
-		var mylen = self.bytelen
-		var finlen = mylen * i
-
-		var my_items = self.items
-
-		var my_real_len = length
-		var my_real_fin_len = my_real_len * i
-
-		var target_string = new NativeString((finlen) + 1)
-
-		var my_index = index
-		var new_index = new StringIndex(my_real_fin_len)
-
-		target_string[finlen] = '\0'
-
-		var current_last = 0
-		var curr_index = 0
-
-		for iteration in [1 .. i] do
-			my_items.copy_to(target_string, mylen, index_from, current_last)
-			my_index.copy_to(new_index, length, 0, curr_index)
-			current_last += mylen
-		end
-
-		return new FlatString.with_infos_index(target_string, my_real_fin_len, 0, my_real_fin_len -1, new_index, finlen)
-
-	end
-
-	redef fun to_upper
-	do
-		var outstr = new NativeString(self.bytelen + 1)
-
-		var out_index = 0
-		var index = self.index
-		var ipos = 0
-		var max = length
-
-		while ipos < max do
-			var u = index[ipos].to_upper
-			u.ns.copy_to(outstr, u.len, u.pos, out_index)
-			out_index += u.len
-			ipos += 1
-		end
-
-		outstr[self.bytelen] = '\0'
-
-		return outstr.to_s_with_length(self.bytelen)
-	end
-
-	redef fun to_lower
-	do
-		var outstr = new NativeString(self.bytelen + 1)
-
-		var out_index = 0
-		var index = self.index
-		var ipos = 0
-		var max = length
-
-		while ipos < max do
-			var u = index[ipos].to_lower
-			u.ns.copy_to(outstr, u.len, u.pos, out_index)
-			out_index += u.len
-			ipos += 1
-		end
-
-		outstr[self.bytelen] = '\0'
-
-		return outstr.to_s_with_length(self.bytelen)
-	end
-
-	redef fun output
-	do
-		var i = self.index_from
-		var imax = self.index_to
-		while i <= imax do
-			index[i].output
-			i += 1
-		end
-	end
-
-end
-
-redef class FlatBuffer
-
-	# Fix for this particular implementation
-	#
-	# Since the to_s of a FlatBuffer now builds using
-	# the old String contructor, this breaks everything.
-	#
-	# This will disappear when UTF8 is fully-supported
-	redef fun to_s do
-		written = false
-		return to_cstring.to_s_with_length(length)
-	end
-end
-
-redef class NativeString
-
-	# Creates the index for said NativeString
-	# `length` is the size of the CString (in bytes, up to the first \0)
-	# real_len is just a way to store the length (UTF-8 characters)
-	private fun make_index(length: Int, real_len: Container[Int]): StringIndex import Container[Int].item=, UTF8Char.len `{
-		int pos = 0;
-		int index_pos = 0;
-		UTF8Char* index = malloc(length*sizeof(UTF8Char));
-		while(pos < length){
-			UTF8Char* curr = &index[index_pos];
-			curr->pos = pos;
-			curr->ns = self;
-			pos += UTF8Char_len(curr);
-			index_pos ++;
-		}
-		Container_of_Int_item__assign(real_len, index_pos);
-		return index;
-	`}
-
-	redef fun to_s: FlatString
-	do
-		var len = cstring_length
-		return to_s_with_length(len)
-	end
-
-	redef fun to_s_with_length(len)
-	do
-		var real_len = new Container[Int](0)
-		var x = make_index(len, real_len)
-		return new FlatString.with_infos_index(self, real_len.item, 0, real_len.item - 1, x, len)
-	end
-
-	redef fun to_s_with_copy
-	do
-		var real_len = new Container[Int](0)
-		var length = cstring_length
-		var x = make_index(length, real_len)
-		var new_self = new NativeString(length + 1)
-		copy_to(new_self, length, 0, 0)
-		return new FlatString.with_infos_index(new_self, real_len.item, 0, real_len.item - 1, x, length)
-	end
-end
-
-redef class FileWriter
-	redef fun write(s)
-	do
-		assert is_writable
-		if s isa FlatText then
-			if s isa FlatString then
-				write_native(s.to_cstring, s.bytelen)
-			else
-				write_native(s.to_cstring, s.length)
-			end
-		else for i in s.substrings do write_native(i.to_cstring, i.length)
-	end
-end
diff --git a/lib/string_experimentations/utf8_noindex.nit b/lib/string_experimentations/utf8_noindex.nit
deleted file mode 100644
index 8756838..0000000
--- a/lib/string_experimentations/utf8_noindex.nit
+++ /dev/null
@@ -1,742 +0,0 @@
-# This file is part of NIT ( http://www.nitlanguage.org ).
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Introduces UTF-8 as internal encoding for Strings in Nit.
-module utf8_noindex
-
-intrude import standard::string
-intrude import standard::file
-
-in "C Header" `{
-
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-
-#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
-
-`}
-
-# UTF-8 char as defined in RFC-3629, e.g. 1-4 Bytes
-extern class UnicodeChar `{ uint32_t* `}
-	super Comparable
-
-	redef type OTHER: UnicodeChar
-
-	# Transforms a byte-variable char* character to its uint32_t equivalent
-	new from_ns(ns: NativeString, index: Int) `{
-		unsigned char* ret = calloc(1,4);
-		if((ns[index] & 0x80) == 0){ memcpy(ret + 3, ns + index, 1);  }
-		else if((ns[index] & 0xE0) == 0xC0) { memcpy(ret + 2, ns + index, 2); }
-		else if((ns[index] & 0xF0) == 0xE0) { memcpy(ret + 1, ns + index, 3); }
-		else if((ns[index] & 0xF7) == 0xF0) { memcpy(ret, ns + index, 4); }
-		else{ memcpy(ret + 3, ns + index, 1);}
-		if (!IS_BIG_ENDIAN) {
-			uint32_t tmp = ntohl(*((uint32_t*)ret));
-			memcpy(ret, &tmp, 4);
-		}
-		return (uint32_t*)ret;
-	`}
-
-	# Real length of the char in UTF8
-	#
-	# As per the specification :
-	#
-	# ~~~raw
-	#  Length  |        UTF-8 octet sequence
-	#          |              (binary)
-	# ---------+-------------------------------------------------
-	#  1       | 0xxxxxxx
-	#  2       | 110xxxxx 10xxxxxx
-	#  3       | 1110xxxx 10xxxxxx 10xxxxxx
-	#  4       | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-	# ~~~
-	fun len: Int `{
-		uint32_t s = *self;
-		if(s <= 127) {return 1;}
-		if(s >= 49280 && s <= 57279) {return 2;}
-		if(s >= 14712960 && s <= 15712191) {return 3;}
-		if(s >= 4034953344 && s <= 4156538815) { return 4; }
-		// Bad character
-		return 1;
-	`}
-
-	# Returns the Unicode code point representing the character
-	#
-	# Note : A unicode character might not be a visible glyph, but it will be used to determine canonical equivalence
-	fun code_point: Int import UnicodeChar.len `{
-		uint32_t val = *self;
-		uint32_t ret = 0;
-		switch(UnicodeChar_len(self)){
-			case 1:
-				ret = *self;
-				break;
-			case 2:
-				ret = 0 | ((val & 0x00001F00) >> 2) | (val & 0x0000003F);
-				break;
-			case 3:
-				ret = 0 | ((val & 0x000F0000) >> 4) | ((val & 0x00003F00) >> 2) | (val & 0x0000003F);
-				break;
-			case 4:
-				ret = 0 | ((val & 0x07000000) >> 6) | ((val & 0x003F0000) >> 4) | ((val & 0x00003F00) >> 2) | (val & 0x0000003F);
-				break;
-		}
-		unsigned char* rt = (unsigned char*) &ret;
-		return ret;
-	`}
-
-	# Warning : This does not follow the Unicode specification for now
-	#
-	# TODO: Support Unicode-compliant comparison
-	redef fun <(o) do return self.code_point < o.code_point
-
-	# Returns an upper-case version of self
-	#
-	# NOTE : Works only on ASCII chars
-	# TODO : Support unicode for to_upper
-	fun to_upper: UnicodeChar import UnicodeChar.code_point `{
-		if(*self < 97 || *self > 122){ return self; }
-		uint32_t* ret = calloc(1,4);
-		*ret = *self - 32;
-		return ret;
-	`}
-
-	# Returns an lower-case version of self
-	#
-	# NOTE : Works only on ASCII chars
-	# TODO : Support unicode for to_upper
-	fun to_lower: UnicodeChar import UnicodeChar.code_point `{
-		if(*self < 65 || *self > 90){ return self; }
-		uint32_t* ret = calloc(1,4);
-		*ret = *self + 32;
-		return ret;
-	`}
-
-	redef fun ==(o)
-	do
-		if not o isa UnicodeChar then return false
-		if o.code_point == self.code_point then return true
-		return false
-	end
-
-	redef fun output import UnicodeChar.len `{
-		uint32_t self0 = *self;
-		if(!IS_BIG_ENDIAN){
-			uint32_t tmp = ntohl(self0);
-			memcpy(&self0, &tmp, 4);
-		}
-		unsigned char* s = (unsigned char*) &self0;
-		switch(UnicodeChar_len(self0)){
-			case 1:
-				printf("%c", s[3]);
-				break;
-			case 2:
-				printf("%c%c", s[2], s[3]);
-				break;
-			case 3:
-				printf("%c%c%c", s[1], s[2], s[3]);
-				break;
-			case 4:
-				printf("%c%c%c%c", s[0], s[1], s[2], s[3]);
-				break;
-		}
-	`}
-
-	redef fun to_s: FlatString import FlatString.full, UnicodeChar.len `{
-		int len = UnicodeChar_len(self);
-		char* r = malloc(len + 1);
-		r[len] = '\0';
-		uint32_t src = *self;
-		if(!IS_BIG_ENDIAN){
-			uint32_t tmp = htonl(src);
-			memcpy(&src, &tmp, 4);
-		}
-		unsigned char* s = (unsigned char*) &src;
-		switch(len){
-			case 1: memcpy(r, s+3, 1); break;
-			case 2: memcpy(r, s+2, 2); break;
-			case 3: memcpy(r, s+1, 3); break;
-			case 4: memcpy(r, s, 4); break;
-		}
-		return new_FlatString_full(r, 0, len - 1, len, 1);
-	`}
-end
-
-# Used to keep track of the last accessed char in a String
-class CharCache
-	# The position (as in char) of a String
-	var position: Int
-	# The position in the NativeString underlying the String
-	var bytepos: Int
-end
-
-class FlatStringReviter
-	super IndexedIterator[UnicodeChar]
-
-	# The NativeString to iterate upon
-	private var ns: NativeString
-
-	# The position in the string
-	private var pos: Int
-
-	# The position in the native string
-	private var bytepos: Int
-
-	init(s: FlatString) do from(s, s.length - 1)
-
-	init from(s: FlatString, position: Int)
-	do
-		ns = s.items
-		pos = position
-		bytepos = s.byte_index(position)
-	end
-
-	redef fun next
-	do
-		bytepos -= 1
-		while ns[bytepos].ascii.bin_and(0xC0) == 0x80 do
-			bytepos -= 1
-		end
-		pos -= 1
-	end
-
-	redef fun index do return pos
-
-	redef fun item do return new UnicodeChar.from_ns(ns, bytepos)
-
-	redef fun is_ok do return pos >= 0
-end
-
-class FlatStringIter
-	super IndexedIterator[UnicodeChar]
-
-	private var ns: NativeString
-
-	private var pos: Int
-
-	private var bytepos: Int
-
-	private var slen: Int
-
-	private var it: UnicodeChar
-
-	private var is_created = false
-
-	init(s: FlatString) do from(s, 0)
-
-	init from(s: FlatString, position: Int) do
-		ns = s.items
-		pos = position
-		bytepos = s.byte_index(position)
-		slen = s.length
-	end
-
-	redef fun index do return pos
-
-	redef fun is_ok do return pos < slen
-
-	redef fun item do
-		if not is_created then
-			it = new UnicodeChar.from_ns(ns, bytepos)
-			is_created = true
-		end
-		return it
-	end
-
-	redef fun next
-	do
-		if not is_created then
-			it = new UnicodeChar.from_ns(ns, bytepos)
-		end
-		is_created = false
-		var pace = it.len
-		pos += 1
-		bytepos += pace
-	end
-end
-
-redef class FlatString
-
-	redef type OTHER: FlatString
-
-	# Length in bytes of the string (e.g. the length of the C string)
-	redef var bytelen
-
-	# Cache for the last accessed character in the char
-	var cache = new CharCache(-1,-1)
-
-	redef var length = length_l is lazy
-
-	private init full(items: NativeString, from, to, bytelen, len: Int)
-	do
-		self.items = items
-		index_from = from
-		index_to = to
-		self.bytelen = bytelen
-		length = len
-	end
-
-	# Length implementation
-	private fun length_l: Int import FlatString.items, FlatString.index_to, FlatString.index_from `{
-		char* ns = FlatString_items(self);
-		int i = FlatString_index_from(self);
-		int max = FlatString_index_to(self);
-		int length = 0;
-		while(i <= max){
-			char c = ns[i];
-			if((c & 0x80) == 0) { i+= 1; }
-			else if((c & 0xE0) == 0xC0) { i += 2; }
-			else if((c & 0xF0) == 0xE0) { i += 3; }
-			else if((c & 0xF7) == 0xF0) { i += 4; }
-			else { i += 1; }
-			length ++;
-		}
-		return length;
-	`}
-
-	redef fun <(o)
-	do
-		var o_pos = 0
-		var olen = o.length
-		for i in [0 .. length[ do
-			if o_pos >= olen then return false
-			if char_at(i) > o.char_at(i) then return false
-			if char_at(i) < o.char_at(i) then return true
-		end
-		return false
-	end
-
-	redef fun ==(o) do
-		if o == null then return false
-		if not o isa FlatString then return super
-		var mylen = length
-		var itslen = o.length
-		if mylen != itslen then return false
-		var mypos = 0
-		var itspos = 0
-
-		while mypos < mylen do
-			if char_at(mypos) != o.char_at(itspos) then return false
-			mypos += 1
-			itspos += 1
-		end
-		return true
-	end
-
-	private fun byte_index(index: Int): Int do
-		assert index >= 0
-		assert index < length
-
-		# Find best insertion point
-		var delta_begin = index
-		var delta_end = (length - 1) - index
-		var delta_cache = (cache.position - index).abs
-		var min = delta_begin
-
-		if delta_cache < min then min = delta_cache
-		if delta_end < min then min = delta_end
-
-		var ns_i: Int
-		var my_i: Int
-		var myits = items
-
-		if min == delta_begin then
-			ns_i = index_from
-			my_i = 0
-		else if min == delta_cache then
-			ns_i = cache.bytepos
-			my_i = cache.position
-		else
-			ns_i = index_to
-			my_i = length
-		end
-
-		while my_i < index do
-			if myits[ns_i].ascii.bin_and(0x80) == 0 then
-				ns_i += 1
-			else if myits[ns_i].ascii.bin_and(0xE0) == 0xC0 then
-				ns_i += 2
-			else if myits[ns_i].ascii.bin_and(0xF0) == 0xE0 then
-				ns_i += 3
-			else if myits[ns_i].ascii.bin_and(0xF7) == 0xF0 then
-				ns_i += 4
-			else
-				ns_i += 1
-			end
-			my_i += 1
-		end
-
-		while my_i > index do
-			if myits[ns_i].ascii.bin_and(0xC0) != 0x80 then
-				my_i -= 1
-				if my_i == index then break
-			end
-			ns_i -= 1
-		end
-
-		cache.position = index
-		cache.bytepos = ns_i
-
-		return ns_i
-	end
-
-	fun char_at(pos: Int): UnicodeChar do
-		return new UnicodeChar.from_ns(items, byte_index(pos))
-	end
-
-	private init with_bytelen(items: NativeString, index_from: Int, index_to: Int, bytelen: Int) do
-		self.items = items
-		self.index_from = index_from
-		self.index_to = index_to
-		self.bytelen = bytelen
-	end
-
-	redef fun reversed do
-		var new_str = new NativeString(bytelen)
-		var s_pos = bytelen
-		var my_pos = index_from
-		var its = items
-		for i in [0..length[ do
-			var c = char_at(i).len
-			s_pos -= c
-			its.copy_to(new_str, c, my_pos, s_pos)
-			my_pos += c
-		end
-		return new FlatString.full(new_str, 0, bytelen - 1, bytelen, length)
-	end
-
-	redef fun to_upper do
-		var ns = new NativeString(bytelen)
-		var offset = 0
-		for i in [0 .. length[
-		do
-			var c = char_at(i)
-			c.to_upper.to_s.items.copy_to(ns, c.len, 0, offset)
-			offset += c.len
-		end
-		return new FlatString.full(ns, 0, bytelen - 1, bytelen, length)
-	end
-
-	redef fun to_lower do
-		var ns = new NativeString(bytelen)
-		var offset = 0
-		for i in [0 .. length[
-		do
-			var c = char_at(i)
-			c.to_lower.to_s.items.copy_to(ns, c.len, 0, offset)
-			offset += c.len
-		end
-		return new FlatString.full(ns, 0, bytelen - 1, bytelen, length)
-	end
-
-	redef fun +(o) do
-		if o isa Buffer then o = o.to_s
-		if o isa FlatString then
-			var new_str = new NativeString(bytelen + o.bytelen + 1)
-			var new_bytelen = bytelen + o.bytelen
-			new_str[new_bytelen] = '\0'
-			var newlen = length + o.length
-			items.copy_to(new_str, bytelen, index_from, 0)
-			o.items.copy_to(new_str, o.bytelen, o.index_from, bytelen)
-			return new FlatString.full(new_str, 0, new_bytelen - 1, new_bytelen, newlen)
-		else if o isa Concat then
-			return new Concat(self, o)
-		else
-			# If it goes to this point, that means another String implementation was concerned, therefore you need to support the + operation for this variant
-			abort
-		end
-	end
-
-	redef fun *(i) do
-		var mybtlen = bytelen
-		var new_bytelen = mybtlen * i
-		var mylen = length
-		var newlen = mylen * i
-		var ns = new NativeString(new_bytelen + 1)
-		ns[new_bytelen] = '\0'
-		var offset = 0
-		while i > 0 do
-			items.copy_to(ns, bytelen, index_from, offset)
-			offset += mybtlen
-			i -= 1
-		end
-		return new FlatString.full(ns, 0, new_bytelen - 1, new_bytelen, newlen)
-	end
-
-	# O(n)
-	redef fun substring(from, count) do
-		assert count >= 0
-
-		if from < 0 then
-			count += from
-			if count < 0 then count = 0
-			from = 0
-		end
-
-		if count == 0 then return empty
-
-		var real_from = byte_index(from)
-
-		var lst = from + count - 1
-
-		if lst > length - from then
-			return new FlatString.with_bytelen(items, real_from, index_to, index_to - real_from)
-		end
-
-		var real_to = byte_index(lst)
-
-		return new FlatString.full(items, real_from, real_to, (real_to + char_at(lst).len) - real_from, count)
-	end
-
-	redef fun to_cstring do
-		if real_items != null then return real_items.as(not null)
-		var new_items = new NativeString(bytelen + 1)
-		self.items.copy_to(new_items, bytelen, index_from, 0)
-		new_items[bytelen] = '\0'
-		self.real_items = new_items
-		return new_items
-	end
-end
-
-redef class Text
-
-	# Length of the string, in bytes
-	fun bytelen: Int is abstract
-
-end
-
-redef class FlatBuffer
-
-	redef var bytelen
-
-	redef init from(s) do
-		if s isa Concat then
-			with_capacity(50)
-			for i in s.substrings do self.append(i)
-		end
-		items = new NativeString(s.bytelen)
-		if s isa FlatString then
-			s.items.copy_to(items, s.bytelen, s.index_from, 0)
-		else
-			s.as(FlatBuffer).items.copy_to(items, s.as(FlatBuffer).bytelen, 0, 0)
-		end
-		length = s.length
-		bytelen = s.bytelen
-		capacity = s.bytelen
-	end
-
-	# Replaces the char at `index` by `item`
-	fun char_at=(index: Int, item: UnicodeChar) do
-		is_dirty = true
-		if index == length then
-			add_unicode item
-			return
-		end
-		assert index >= 0 and index < length
-		var ip = byte_at(index)
-		var c = char_at_byte(ip)
-		var size_diff = item.len - c.len
-		if size_diff > 0 then
-			rshift_bytes(ip + c.len, size_diff)
-		else if size_diff < 0 then
-			lshift_bytes(ip + c.len, -size_diff)
-		end
-		var s = item.to_s
-		s.items.copy_to(items, s.bytelen, 0, ip)
-	end
-
-	# Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
-	fun rshift_bytes(from: Int, len: Int) import FlatBuffer.bytelen, FlatBuffer.bytelen=, FlatBuffer.items `{
-		long bt = FlatBuffer_bytelen(self);
-		char* ns = FlatBuffer_items(self);
-		int off = from + len;
-		memmove(ns + off, ns + from, bt - from);
-		FlatBuffer_bytelen__assign(self, bt + len);
-	`}
-
-	# Shifts the content of the buffer by `len` bytes to the left, starting at `from`
-	fun lshift_bytes(from: Int, len: Int) import FlatBuffer.bytelen, FlatBuffer.bytelen=, FlatBuffer.items `{
-		long bt = FlatBuffer_bytelen(self);
-		char* ns = FlatBuffer_items(self);
-		int off = from - len;
-		memmove(ns + off, ns + from, bt - from);
-		FlatBuffer_bytelen__assign(self, bt - len);
-	`}
-
-	# Get the Unicode char stored at `index` in `self`
-	fun char_at(index: Int): UnicodeChar do return new UnicodeChar.from_ns(items, byte_at(index))
-
-	# Get the Unicode char stored at `index` (bytewise) in `self`
-	fun char_at_byte(index: Int): UnicodeChar do return new UnicodeChar.from_ns(items, index)
-
-	# Add equivalent that supports Unicode
-	fun add_unicode(c: UnicodeChar) do
-		var s = c.to_s
-		if s.bytelen + bytelen > capacity then enlarge(s.bytelen)
-		s.items.copy_to(items, s.bytelen, 0, bytelen)
-	end
-
-	# Gets the byte index (in NativeString) of the char stored at `i`
-	fun byte_at(i: Int): Int do
-		assert i < length and i >= 0
-		var ns_i = 0
-		var real_i = 0
-		while real_i < i do
-			if items[ns_i].ascii.bin_and(0x80) == 0 then
-				ns_i += 1
-			else if items[ns_i].ascii.bin_and(0xE0) == 0xC0 then
-				ns_i += 2
-			else if items[ns_i].ascii.bin_and(0xF0) == 0xE0 then
-				ns_i += 3
-			else if items[ns_i].ascii.bin_and(0xF7) == 0xF0 then
-				ns_i += 4
-			else
-				ns_i += 1
-			end
-			real_i += 1
-		end
-		return ns_i
-	end
-
-	redef fun enlarge(cap) do
-		var c = capacity
-		if cap <= c then return
-		while c <= cap do c = c * 2 + 2
-		var a = new NativeString(c+1)
-		if bytelen > 0 then items.copy_to(a, bytelen, 0, 0)
-		items = a
-		capacity = c
-	end
-
-	redef fun append(s) do
-		if s isa Concat then
-			for i in s.substrings do append i
-		end
-		var i = s.as(FlatString)
-		var blen = bytelen
-		var iblen = i.bytelen
-		var newlen = blen + iblen
-		if newlen > capacity then
-			enlarge(newlen)
-		end
-		i.items.copy_to(items, iblen, i.index_from, blen)
-		bytelen += iblen
-		length += i.length
-	end
-
-	redef fun reverse
-	do
-		var nns = new NativeString(bytelen)
-		var ns = items
-		var btlen = bytelen
-		var myp = 0
-		var itsp = btlen
-		while myp < btlen do
-			var c = char_at_byte(myp).len
-			itsp -= c
-			ns.copy_to(nns, c, myp, itsp)
-			myp += c
-		end
-		items = nns
-	end
-
-	redef fun clear do
-		length = 0
-		bytelen = 0
-	end
-
-	redef fun copy(s, l, d, ns) do
-		if not d isa FlatBuffer then
-			# This implementation here is only concerned by the FlatBuffer
-			# If you implement a new Buffer subclass, make sure to support this operation via refinement.
-			abort
-		end
-		var rs = byte_at(s)
-		var re = byte_at(s + l - 1)
-		var rl = re - rs
-		var rns = d.byte_at(ns)
-		items.copy_to(d.items, rl, rns, rs)
-	end
-
-	redef fun times(i) do
-		var len = bytelen
-		var off = len
-		var newlen = len * i
-		if newlen > capacity then enlarge(newlen)
-		for j in [1 .. i[ do
-			items.copy_to(items, len, 0, off)
-			off += len
-		end
-		bytelen = newlen
-		length = length * i
-	end
-
-	redef fun upper do
-		for i in [0 .. length[ do
-			var pos = byte_at(i)
-			var c = char_at_byte(pos)
-			var d = c.to_upper
-			if c == d then continue
-			d.to_s.items.copy_to(items, 1, 0, pos)
-		end
-	end
-
-	redef fun lower do
-		for i in [0 .. length[ do
-			var pos = byte_at(i)
-			var c = char_at_byte(pos)
-			var d = c.to_lower
-			if c == d then continue
-			d.to_s.items.copy_to(items, 1, 0, pos)
-		end
-	end
-
-	redef fun to_cstring do
-		var ns = new NativeString(bytelen)
-		items.copy_to(ns, bytelen, 0, 0)
-		return ns
-	end
-end
-
-redef class NativeString
-
-	redef fun to_s: FlatString
-	do
-		var len = cstring_length
-		return to_s_with_length(len)
-	end
-
-	redef fun to_s_with_length(len)
-	do
-		return new FlatString.with_bytelen(self, 0, len - 1, len)
-	end
-
-	redef fun to_s_with_copy
-	do
-		var length = cstring_length
-		var new_self = new NativeString(length + 1)
-		copy_to(new_self, length, 0, 0)
-		return new FlatString.with_bytelen(new_self, 0, length - 1, length)
-	end
-end
-
-redef class FileWriter
-	redef fun write(s)
-	do
-		assert is_writable
-		if s isa FlatText then
-			write_native(s.to_cstring, s.bytelen)
-		else for i in s.substrings do write_native(i.to_cstring, i.length)
-	end
-end
diff --git a/tests/sav/utf_test.res b/tests/sav/utf_test.res
deleted file mode 100644
index 4055d93..0000000
--- a/tests/sav/utf_test.res
+++ /dev/null
@@ -1,11 +0,0 @@
-28
-ãã§aèªAæ¬æ¥a ð,A á . á çä¸aðã¼Ðã­aã
-ãaã­Ðã¼ðaä¸ç á . á A,ð aæ¥æ¬Aèªaã§ã
-ã¼ðaä¸
-30fc
-10343
-61
-4e16
-ãAã­Ðã¼ðAä¸ç á . á A,ð Aæ¥æ¬AèªAã§ã
-ãaã­Ðã¼ðaä¸ç á . á a,ð aæ¥æ¬aèªaã§ã
-ãaã­Ðã¼ðaä¸ç á . á A,ð aæ¥æ¬Aèªaã§ããaã­Ðã¼ðaä¸ç á . á A,ð aæ¥æ¬Aèªaã§ã
diff --git a/tests/utf_test.nit b/tests/utf_test.nit
deleted file mode 100644
index 88474c4..0000000
--- a/tests/utf_test.nit
+++ /dev/null
@@ -1,42 +0,0 @@
-# This file is part of NIT ( http://www.nitlanguage.org ).
-#
-# This file is free software, which comes along with NIT.  This software is
-# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
-# without  even  the implied warranty of  MERCHANTABILITY or  FITNESS FOR A
-# PARTICULAR PURPOSE.  You can modify it is you want,  provided this header
-# is kept unaltered, and a notification of the changes is added.
-# You  are  allowed  to  redistribute it and sell it, alone or is a part of
-# another product.
-
-import standard
-intrude import string_experimentations::utf8
-
-var s = "aÃ ãð".as(FlatString)
-assert s.index[0].code_point == 97
-assert s.index[1].code_point == 224
-assert s.index[2].code_point == 12495
-assert s.index[3].code_point == 66374
-
-var str = "ãaã­Ðã¼ðaä¸ç á . á A,ð aæ¥æ¬Aèªaã§ã".as(FlatString)
-
-print str.length
-
-print str.reversed
-
-str.output
-
-print ""
-
-var x = str.substring(4,4).as(FlatString)
-
-print x
-
-for i in [0..x.length[ do
-	print x.index[i + x.index_from].code_point.to_hex
-end
-
-print str.to_upper
-
-print str.to_lower
-
-print str * 2
-- 
1.7.9.5