X-Git-Url: http://nitlanguage.org

diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit
index 200fdcf..6273609 100644
--- a/lib/core/text/flat.nit
+++ b/lib/core/text/flat.nit
@@ -55,7 +55,11 @@ redef class FlatText
 		var its = _items
 
 		if dpos == 1 then
-			b += _items.length_of_char_at(b)
+			if its[b] & 0x80u8 == 0x00u8 then
+				b += 1
+			else
+				b += its.length_of_char_at(b)
+			end
 			_bytepos = b
 			_position = index
 			return b
@@ -221,6 +225,22 @@ redef class FlatText
 				req_esc += 1
 			else if c == 0x5Cu8 then
 				req_esc += 1
+			else if c == 0x3Fu8 then
+				var j = pos + 1
+				if j < length then
+					var next = its[j]
+					# We ignore `??'` because it will be escaped as `??\'`.
+					if
+						next == 0x21u8 or
+						next == 0x28u8 or
+						next == 0x29u8 or
+						next == 0x2Du8 or
+						next == 0x2Fu8 or
+						next == 0x3Cu8 or
+						next == 0x3Du8 or
+						next == 0x3Eu8
+					then req_esc += 1
+				end
 			else if c < 32u8 then
 				req_esc += 3
 			end
@@ -276,6 +296,27 @@ redef class FlatText
 				nns[opos] = 0x5Cu8
 				nns[opos + 1] = 0x5Cu8
 				opos += 2
+			else if c == 0x3Fu8 then
+				var j = pos + 1
+				if j < length then
+					var next = its[j]
+					# We ignore `??'` because it will be escaped as `??\'`.
+					if
+						next == 0x21u8 or
+						next == 0x28u8 or
+						next == 0x29u8 or
+						next == 0x2Du8 or
+						next == 0x2Fu8 or
+						next == 0x3Cu8 or
+						next == 0x3Du8 or
+						next == 0x3Eu8
+					then
+						nns[opos] = 0x5Cu8
+						opos += 1
+					end
+				end
+				nns[opos] = 0x3Fu8
+				opos += 1
 			else if c < 32u8 then
 				nns[opos] = 0x5Cu8
 				nns[opos + 1] = 0x30u8
@@ -292,7 +333,49 @@ redef class FlatText
 	end
 
 	redef fun [](index) do
-		assert index >= 0 and index < _length
+		var len = _length
+
+		# Statistically:
+		# * ~70% want the next char
+		# * ~23% want the previous
+		# * ~7% want the same char
+		#
+		# So it makes sense to shortcut early. And early is here.
+		var dpos = index - _position
+		var b = _bytepos
+		if dpos == 1 and index < len - 1 then
+			var its = _items
+			var c = its[b]
+			if c & 0x80u8 == 0x00u8 then
+				# We want the next, and current is easy.
+				# So next is easy to find!
+				b += 1
+				_position = index
+				_bytepos = b
+				# The rest will be done by `dpos==0` bellow.
+				dpos = 0
+			end
+		else if dpos == -1 and index > 1 then
+			var its = _items
+			var c = its[b-1]
+			if c & 0x80u8 == 0x00u8 then
+				# We want the previous, and it is easy.
+				b -= 1
+				dpos = 0
+				_position = index
+				_bytepos = b
+				return c.ascii
+			end
+		end
+		if dpos == 0 then
+			# We know what we want (+0 or +1) just get it now!
+			var its = _items
+			var c = its[b]
+			if c & 0x80u8 == 0x00u8 then return c.ascii
+			return items.char_at(b)
+		end
+
+		assert index >= 0 and index < len
 		return fetch_char_at(index)
 	end
 
@@ -323,10 +406,14 @@ redef class FlatText
 		end
 		return res
 	end
+
+	redef fun copy_to_native(dst, n, src_off, dst_off) do
+		_items.copy_to(dst, n, first_byte + src_off, dst_off)
+	end
 end
 
 # Immutable strings of characters.
-class FlatString
+abstract class FlatString
 	super FlatText
 	super String
 
@@ -359,22 +446,13 @@ class FlatString
 
 	redef fun fast_cstring do return _items.fast_cstring(_first_byte)
 
-	redef fun substring_from(from) do
-		if from >= self._length then return empty
-		if from <= 0 then return self
-		var c = char_to_byte_index(from)
-		var st = c - _first_byte
-		var fln = bytelen - st
-		return new FlatString.full(items, fln, c, _length - from)
-	end
-
 	redef fun substring(from, count)
 	do
 		if count <= 0 then return ""
 
 		if from < 0 then
 			count += from
-			if count < 0 then return ""
+			if count <= 0 then return ""
 			from = 0
 		end
 
@@ -452,26 +530,21 @@ class FlatString
 	#
 	# `_items` will be used as is, without copy, to retrieve the characters of the string.
 	# Aliasing issues is the responsibility of the caller.
-	private init with_infos(items: NativeString, bytelen, from: Int)
+	private new with_infos(items: NativeString, bytelen, from: Int)
 	do
-		self._items = items
-		self._bytelen = bytelen
-		_first_byte = from
-		_bytepos = from
-		_length = _items.utf8_length(_first_byte, bytelen)
+		var len = items.utf8_length(from, bytelen)
+		if bytelen == len then return new ASCIIFlatString.full_data(items, bytelen, from, len)
+		return new UnicodeFlatString.full_data(items, bytelen, from, len)
 	end
 
 	# Low-level creation of a new string with all the data.
 	#
 	# `_items` will be used as is, without copy, to retrieve the characters of the string.
 	# Aliasing issues is the responsibility of the caller.
-	private init full(items: NativeString, bytelen, from, length: Int)
+	private new full(items: NativeString, bytelen, from, length: Int)
 	do
-		self._items = items
-		self._length = length
-		self._bytelen = bytelen
-		_first_byte = from
-		_bytepos = from
+		if bytelen == length then return new ASCIIFlatString.full_data(items, bytelen, from, length)
+		return new UnicodeFlatString.full_data(items, bytelen, from, length)
 	end
 
 	redef fun ==(other)
@@ -568,7 +641,6 @@ class FlatString
 		return new FlatString.full(ns, new_bytelen, 0, newlen)
 	end
 
-
 	redef fun hash
 	do
 		if hash_cache == null then
@@ -593,6 +665,80 @@ class FlatString
 	redef fun substrings do return new FlatSubstringsIter(self)
 end
 
+# Regular Nit UTF-8 strings
+private class UnicodeFlatString
+	super FlatString
+
+	init full_data(items: NativeString, bytelen, from, length: Int) do
+		self._items = items
+		self._length = length
+		self._bytelen = bytelen
+		_first_byte = from
+		_bytepos = from
+	end
+
+	redef fun substring_from(from) do
+		if from >= self._length then return empty
+		if from <= 0 then return self
+		var c = char_to_byte_index(from)
+		var st = c - _first_byte
+		var fln = bytelen - st
+		return new FlatString.full(items, fln, c, _length - from)
+	end
+end
+
+# Special cases of String where all the characters are ASCII-based
+#
+# Optimizes access operations to O(1) complexity.
+private class ASCIIFlatString
+	super FlatString
+
+	init full_data(items: NativeString, bytelen, from, length: Int) do
+		self._items = items
+		self._length = length
+		self._bytelen = bytelen
+		_first_byte = from
+		_bytepos = from
+	end
+
+	redef fun [](idx) do
+		assert idx < _bytelen and idx >= 0
+		return _items[idx + _first_byte].ascii
+	end
+
+	redef fun substring(from, count) do
+		var ln = _length
+		if count <= 0 then return ""
+		if (count + from) > ln then count = ln - from
+		if count <= 0 then return ""
+		if from < 0 then
+			count += from
+			if count <= 0 then return ""
+			from = 0
+		end
+		return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
+	end
+
+	redef fun reversed do
+		var b = new FlatBuffer.with_capacity(_bytelen + 1)
+		var i = _length - 1
+		while i >= 0 do
+			b.add self[i]
+			i -= 1
+		end
+		var s = b.to_s.as(FlatString)
+		return s
+	end
+
+	redef fun char_to_byte_index(index) do return index + _first_byte
+
+	redef fun substring_impl(from, count, end_index) do
+		return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
+	end
+
+	redef fun fetch_char_at(i) do return _items[i + _first_byte].ascii
+end
+
 private class FlatStringCharReverseIterator
 	super IndexedIterator[Char]
 
@@ -817,9 +963,12 @@ class FlatBuffer
 
 	redef fun clear do
 		is_dirty = true
-		if written then reset
 		_bytelen = 0
 		_length = 0
+		if written then
+			_capacity = 16
+			reset
+		end
 	end
 
 	redef fun empty do return new Buffer
@@ -828,12 +977,13 @@ class FlatBuffer
 	do
 		var c = capacity
 		if cap <= c then return
-		while c <= cap do c = c * 2 + 2
+		if c <= 16 then c = 16
+		while c <= cap do c = c * 2
 		# The COW flag can be set at false here, since
 		# it does a copy of the current `Buffer`
 		written = false
 		var bln = _bytelen
-		var a = new NativeString(c+1)
+		var a = new NativeString(c)
 		if bln > 0 then
 			var it = _items
 			if bln > 0 then it.copy_to(a, bln, 0, 0)
@@ -885,22 +1035,17 @@ class FlatBuffer
 	init from(s: Text)
 	do
 		_items = new NativeString(s.bytelen)
-		if s isa FlatText then
-			_items = s._items
-		else
-			for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0)
-		end
+		for i in s.substrings do i._items.copy_to(_items, i._bytelen, first_byte, 0)
 		_bytelen = s.bytelen
 		_length = s.length
 		_capacity = _bytelen
-		written = true
 	end
 
 	# Create a new empty string with a given capacity.
 	init with_capacity(cap: Int)
 	do
 		assert cap >= 0
-		_items = new NativeString(cap + 1)
+		_items = new NativeString(cap)
 		capacity = cap
 		_bytelen = 0
 	end
@@ -948,6 +1093,21 @@ class FlatBuffer
 		return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
 	end
 
+	redef fun append_substring_impl(s, from, length) do
+		if length <= 0 then return
+		if not s isa FlatText then
+			super
+			return
+		end
+		var bytest = s.char_to_byte_index(from)
+		var bytend = s.char_to_byte_index(from + length - 1)
+		var btln = bytend - bytest + 1
+		enlarge(btln + _bytelen)
+		s._items.copy_to(_items, btln, bytest, _bytelen)
+		_bytelen += btln
+		_length += length
+	end
+
 	redef fun reverse
 	do
 		written = false
@@ -1122,8 +1282,7 @@ redef class NativeString
 		return to_s_with_length(cstring_length)
 	end
 
-	# Returns `self` as a String of `length`.
-	redef fun to_s_with_length(length): FlatString
+	redef fun to_s_with_length(length)
 	do
 		assert length >= 0
 		return clean_utf8(length)
@@ -1138,10 +1297,11 @@ redef class NativeString
 		return new FlatString.with_infos(self, len, 0)
 	end
 
-	# Returns `self` as a new String.
-	redef fun to_s_with_copy: FlatString
+	redef fun to_s_with_copy do return to_s_with_copy_and_length(cstring_length)
+
+	# Get a `String` from `length` bytes at `self` copied into Nit memory
+	fun to_s_with_copy_and_length(length: Int): String
 	do
-		var length = cstring_length
 		var r = clean_utf8(length)
 		if r.items != self then return r
 		var new_self = new NativeString(length + 1)
@@ -1250,48 +1410,29 @@ redef class NativeString
 	#
 	# Very unsafe, make sure to have room for this char prior to calling this function.
 	private fun set_char_at(pos: Int, c: Char) do
-		if c.code_point < 128 then
-			self[pos] = c.code_point.to_b
+		var cp = c.code_point
+		if cp < 128 then
+			self[pos] = cp.to_b
 			return
 		end
 		var ln = c.u8char_len
-		native_set_char(pos, c, ln)
-	end
-
-	private fun native_set_char(pos: Int, c: Char, ln: Int) `{
-		char* dst = self + pos;
-		switch(ln){
-			case 1:
-				dst[0] = c;
-				break;
-			case 2:
-				dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
-				dst[1] = 0x80 | (c & 0x3F);
-				break;
-			case 3:
-				dst[0] = 0xE0 | ((c & 0xF000) >> 12);
-				dst[1] = 0x80 | ((c & 0xFC0) >> 6);
-				dst[2] = 0x80 | (c & 0x3F);
-				break;
-			case 4:
-				dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
-				dst[1] = 0x80 | ((c & 0x3F000) >> 12);
-				dst[2] = 0x80 | ((c & 0xFC0) >> 6);
-				dst[3] = 0x80 | (c & 0x3F);
-				break;
-		}
-	`}
+		if ln == 2 then
+			self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b
+			self[pos + 1] = (0x80 | (cp & 0x3F)).to_b
+		else if ln == 3 then
+			self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b
+			self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
+			self[pos + 2] = (0x80 | (cp & 0x3F)).to_b
+		else if ln == 4 then
+			self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b
+			self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b
+			self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
+			self[pos + 3] = (0x80 | (cp & 0x3F)).to_b
+		end
+	end
 end
 
 redef class Int
-	redef fun to_base(base, signed)
-	do
-		var l = digit_count(base)
-		var s = new FlatBuffer.from(" " * l)
-		fill_buffer(s, base, signed)
-		return s.to_s
-	end
-
 	# return displayable int in base 10 and signed
 	#
 	#     assert 1.to_s            == "1"