In regard to #1262, and after many rewritings, here's a fully-functional prototype for UTF-8 compliant Strings !
Many things have changed and now, operations on String will need to be considered differently to avoid bad surprises, namely:
- FlatBuffers are slow as hell when getting chars or modifying in-place (and an in-place modification can degenerate if a single-byte char is modified by a multibyte char)
- Length is now potentially expensive (especially within FlatBuffer where it is not cached)
- Indexed access is now O(n), though it is cached for local accesses (except in FlatBuffer)
Performances, for the user time of `nitc src/nitc.nit -o bin/nitc`, has gone from 4.55s to 4.80s, so +5.5%; we have seen worse.
So a little slowdown, but definitely acceptable when introducing UTF-8 in Strings.
Pull-Request: #1277
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Romain Chanoir <chanoir.romain@courrier.uqam.ca>
Reviewed-by: Alexandre Terrasa <alexandre@moz-code.org>
dfa_state = -1
else
var c = string[sp].ascii
+ if c >= 255 then c = 255
sp += 1
var cr = _cr
var bns: NativeString is noinit
redef var length is noinit
+ # Unsafe, but since it is an experiment, don't mind
+ redef fun bytelen do return length
+
redef fun empty do return new Leaf(new ManualBuffer)
redef fun to_cstring do
var bpos = buf.pos
var sits = s.items
if bpos == mlen then
- sits.copy_to(buf.ns, slen, s.index_from, bpos)
+ sits.copy_to(buf.ns, slen, s.first_byte, bpos)
buf.pos = bpos + slen
return new Leaf(buf)
else
var b = new ManualBuffer
var nbns = b.ns
bns.copy_to(nbns, mlen, 0, 0)
- sits.copy_to(nbns, slen, s.index_from, mlen)
+ sits.copy_to(nbns, slen, s.first_byte, mlen)
b.pos = nlen
return new Leaf(b)
end
for i in substrings do
var ilen = i.length
if i isa FlatString then
- i.items.copy_to(ns, ilen, i.index_from, off)
+ i.items.copy_to(ns, ilen, i.first_byte, off)
else if i isa Leaf then
i.buf.ns.copy_to(ns, ilen, 0, off)
else
if s isa FlatString then
if slen + mlen > maxlen then return new Concat(self, s)
var mits = items
- var sifrom = s.index_from
- var mifrom = index_from
+ var sifrom = s.first_byte
+ var mifrom = first_byte
var sits = s.items
var b = new ManualBuffer
var bns = b.ns
return new Concat(sl + self, s.right)
else if s isa Leaf then
if slen + mlen > maxlen then return new Concat(self, s)
- var mifrom = index_from
+ var mifrom = first_byte
var sb = s.buf
var b = new ManualBuffer
var bns = b.ns
var tmp = na[i]
var tpl = tmp.length
if tmp isa FlatString then
- tmp.items.copy_to(ns, tpl, tmp.index_from, off)
+ tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
off += tpl
else
for j in tmp.substrings do
var slen = j.length
if j isa FlatString then
- j.items.copy_to(ns, slen, j.index_from, off)
+ j.items.copy_to(ns, slen, j.first_byte, off)
else if j isa Leaf then
j.buf.ns.copy_to(ns, slen, 0, off)
end
s += "n{object_id} -> n{str.object_id} [label = \"str\"];\n"
s += str.internal_to_dot
s += "n{object_id} -> n{ns.object_id} [label = \"ns\"];\n"
- s += "n{ns.object_id}[label = \"NativeString\", content=\"{ns.to_s_with_length(rpos)}\"];\n"
+ s += "n{ns.object_id}[label = \"Items\", content=\"{ns}\"];\n"
return s
end
end
redef class FlatString
redef fun internal_to_dot: String
do
- return "n{object_id} [label=\"FlatString\\nindex_from = {index_from}\\nindex_to = {index_to}\\nNativeString = {items.to_s_with_length(items.cstring_length)}\"];\n"
+ return "n{object_id} [label=\"FlatString\\nlength = {length}\\nbytelen = {bytelen}\\nfirst_byte = {first_byte}\\nlast_byte = {last_byte}\\nText = {self.escape_to_dot}\"];\n"
end
end
redef class FlatBuffer
redef fun internal_to_dot: String
do
- return "n{object_id} [label=\"FlatBuffer\\length = {length}\\ncapacity = {capacity}\\nitems = {items.to_s_with_length(items.cstring_length)}\"];\n"
+ return "n{object_id} [label=\"FlatBuffer\\nbytelen = {bytelen}\\nlength = {length}\\ncapacity = {capacity}\\nText = {escape_to_dot}\"];\n"
end
end
private fun write_native_to(s: FileWriter)
do
- for i in substrings do s.write_native(i.to_cstring, 0, i.length)
+ for i in substrings do s.write_native(i.to_cstring, 0, i.bytelen)
end
end
redef class FlatString
redef fun write_native_to(s)
do
- s.write_native(items, index_from, length)
+ s.write_native(items, first_byte, bytelen)
end
end
redef type OTHER: Char
redef fun object_id is intern
+ redef fun output `{
+ if(self < 128){
+ printf("%c", self);
+ }else if(self < 2048){
+ printf("%c%c", 0xC0 | ((0x7C0 & self) >> 6), 0x80 | (0x3F & self));
+ }else if(self < 65536){
+ printf("%c%c%c", 0xE0 | ((0xF000 & self) >> 12), 0x80 | ((0xFC0 & self) >> 6) ,0x80 | (0x3F & self));
+ }else if(self < 2097152){
+ printf("%c%c%c%c", 0xF0 | ((0x1C0000 & self) >> 18), 0x80 | ((0x3F000 & self) >> 12), 0x80 | ((0xFC0 & self) >> 6), 0x80 | (0x3F & self));
+ }else{
+ // Bad char
+ printf("%c", self);
+ }
+ `}
redef fun hash do return ascii
redef fun ==(o) is intern
redef fun !=(o) is intern
- redef fun output is intern
redef fun <=(i) is intern
redef fun <(i) is intern
# if there is something to append
if i > _buffer_pos then
# Enlarge the string (if needed)
- s.enlarge(s.length + i - _buffer_pos)
+ s.enlarge(s.bytelen + i - _buffer_pos)
# Copy from the buffer to the string
var j = _buffer_pos
return new Bytes(nns, nslen, nslen)
end
- redef fun eof do return cursor >= source.length
+ redef fun eof do return cursor >= source.bytelen
end
#
# assert "12345".length == 5
# assert "".length == 0
+ # assert "あいうえお".length == 5
fun length: Int is abstract
# Number of bytes in `self`
#
- # TODO: Implement correctly once UTF-8 is supported
- fun bytelen: Int do return length
+ # assert "12345".bytelen == 5
+ # assert "あいうえお".bytelen == 15
+ fun bytelen: Int is abstract
# Create a substring.
#
# assert "abcd".substring(-1, 2) == "a"
# assert "abcd".substring(1, 0) == ""
# assert "abcd".substring(2, 5) == "cd"
+ # assert "あいうえお".substring(1,3) == "いうえ"
#
# A `from` index < 0 will be replaced by 0.
# Unless a `count` value is > 0 at the same time.
# Real items, used as cache for to_cstring is called
private var real_items: nullable NativeString = null
- # Returns a char* starting at position `index_from`
+ # Returns a char* starting at position `first_byte`
#
# WARNING: If you choose to use this service, be careful of the following.
#
redef var length = 0
+ redef var bytelen = 0
+
redef fun output
do
var i = 0
redef fun iterator do return self.iterator_from(0)
- redef fun reverse_iterator do return self.reverse_iterator_from(self.length - 1)
+ redef fun reverse_iterator do return self.reverse_iterator_from(target.bytelen - 1)
end
# Immutable sequence of characters.
super Sequence[Byte]
redef type SELFTYPE: Buffer
-
end
redef class Object
end
redef class Char
+
+ # Length of `self` in a UTF-8 String
+ private fun u8char_len: Int do
+ var c = self.ascii
+ if c < 0x80 then return 1
+ if c <= 0x7FF then return 2
+ if c <= 0xFFFF then return 3
+ if c <= 0x10FFFF then return 4
+ # Bad character format
+ return 1
+ end
+
# assert 'x'.to_s == "x"
- redef fun to_s
- do
- var s = new Buffer.with_cap(1)
- s.chars[0] = self
- return s.to_s
+ redef fun to_s do
+ var ln = u8char_len
+ var ns = new NativeString(ln + 1)
+ u8char_tos(ns, ln)
+ return ns.to_s_with_length(ln)
end
+ private fun u8char_tos(r: NativeString, len: Int) `{
+ r[len] = '\0';
+ switch(len){
+ case 1:
+ r[0] = self;
+ break;
+ case 2:
+ r[0] = 0xC0 | ((self & 0x7C0) >> 6);
+ r[1] = 0x80 | (self & 0x3F);
+ break;
+ case 3:
+ r[0] = 0xE0 | ((self & 0xF000) >> 12);
+ r[1] = 0x80 | ((self & 0xFC0) >> 6);
+ r[2] = 0x80 | (self & 0x3F);
+ break;
+ case 4:
+ r[0] = 0xF0 | ((self & 0x1C0000) >> 18);
+ r[1] = 0x80 | ((self & 0x3F000) >> 12);
+ r[2] = 0x80 | ((self & 0xFC0) >> 6);
+ r[3] = 0x80 | (self & 0x3F);
+ break;
+ }
+ `}
+
# Returns true if the char is a numerical digit
#
# assert '0'.is_numeric
# assert '9'.is_numeric
# assert not 'a'.is_numeric
# assert not '?'.is_numeric
+ #
+ # FIXME: Works on ASCII-range only
fun is_numeric: Bool
do
return self >= '0' and self <= '9'
# assert 'Z'.is_alpha
# assert not '0'.is_alpha
# assert not '?'.is_alpha
+ #
+ # FIXME: Works on ASCII-range only
fun is_alpha: Bool
do
return (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z')
# assert '0'.is_alphanumeric
# assert '9'.is_alphanumeric
# assert not '?'.is_alphanumeric
+ #
+ # FIXME: Works on ASCII-range only
fun is_alphanumeric: Bool
do
return self.is_numeric or self.is_alpha
module flat
intrude import abstract_text
+intrude import native
`{
#include <stdio.h>
super FlatText
super String
- # Index in _items of the start of the string
- private var index_from: Int is noinit
+ # Index at which `self` begins in `items`, inclusively
+ private var first_byte: Int is noinit
- # Indes in _items of the last item of the string
- private var index_to: Int is noinit
+ # Index at which `self` ends in `items`, inclusively
+ private var last_byte: Int is noinit
redef var chars = new FlatStringCharView(self) is lazy
redef var bytes = new FlatStringByteView(self) is lazy
- redef fun [](index)
- do
- # Check that the index (+ index_from) is not larger than indexTo
- # In other terms, if the index is valid
- assert index >= 0
- assert (index + index_from) <= index_to
- return items[index + index_from].to_i.ascii
+ # Cache of the latest position (char) explored in the string
+ var position: Int = 0
+ # Cached position (bytes) in the NativeString underlying the String
+ var bytepos: Int = first_byte is lateinit
+
+ redef var length is lazy do
+ if bytelen == 0 then return 0
+ var st = first_byte
+ var its = items
+ var ln = 0
+ var lst = last_byte
+ while st <= lst do
+ st += its.length_of_char_at(st)
+ ln += 1
+ end
+ return ln
end
- ################################################
- # AbstractString specific methods #
- ################################################
+ redef fun [](index) do return items.char_at(char_to_byte_index(index))
+
+ # Index of the character `index` in `items`
+ private fun char_to_byte_index(index: Int): Int do
+ var ln = length
+ assert index >= 0
+ assert index < ln
+
+ # Find best insertion point
+ var delta_begin = index
+ var delta_end = (ln - 1) - index
+ var delta_cache = (position - index).abs
+ var min = delta_begin
+ var its = items
+
+ if delta_cache < min then min = delta_cache
+ if delta_end < min then min = delta_end
+
+ var ns_i: Int
+ var my_i: Int
+
+ if min == delta_begin then
+ ns_i = first_byte
+ my_i = 0
+ else if min == delta_cache then
+ ns_i = bytepos
+ my_i = position
+ else
+ ns_i = its.find_beginning_of_char_at(last_byte)
+ my_i = length - 1
+ end
+
+ ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
+
+ position = index
+ bytepos = ns_i
+
+ return ns_i
+ end
redef fun reversed
do
- var native = new NativeString(self.length + 1)
- var length = self.length
- var items = self.items
- var pos = 0
- var ipos = length-1
- while pos < length do
- native[pos] = items[ipos]
- pos += 1
- ipos -= 1
+ var b = new FlatBuffer.with_capacity(bytelen + 1)
+ for i in [length - 1 .. 0].step(-1) do
+ b.add self[i]
end
- return native.to_s_with_length(self.length)
+ var s = b.to_s.as(FlatString)
+ s.length = self.length
+ return s
end
- redef fun fast_cstring do return items.fast_cstring(index_from)
+ redef fun fast_cstring do return items.fast_cstring(first_byte)
redef fun substring(from, count)
do
from = 0
end
- var new_from = index_from + from
-
- if (new_from + count) > index_to then
- var new_len = index_to - new_from + 1
- if new_len <= 0 then return empty
- return new FlatString.with_infos(items, new_len, new_from, index_to)
- end
-
- if count <= 0 then return empty
+ if (count + from) > length then count = length - from
+ if count <= 0 then return ""
+ var end_index = from + count - 1
- var to = new_from + count - 1
+ var bytefrom = char_to_byte_index(from)
+ var byteto = char_to_byte_index(end_index)
+ byteto += items.length_of_char_at(byteto) - 1
- return new FlatString.with_infos(items, to - new_from + 1, new_from, to)
+ var s = new FlatString.full(items, byteto - bytefrom + 1, bytefrom, byteto, count)
+ return s
end
redef fun empty do return "".as(FlatString)
# String Specific Methods #
##################################################
- # Low-level creation of a new string with given data.
+ # Low-level creation of a new string with minimal data.
#
# `items` will be used as is, without copy, to retrieve the characters of the string.
# Aliasing issues is the responsibility of the caller.
- private init with_infos(items: NativeString, length: Int, from: Int, to: Int)
+ private init with_infos(items: NativeString, bytelen, from, to: Int)
do
self.items = items
- self.length = length
- index_from = from
- index_to = to
+ self.bytelen = bytelen
+ first_byte = from
+ last_byte = to
end
- redef fun to_cstring
+ # Low-level creation of a new string with all the data.
+ #
+ # `items` will be used as is, without copy, to retrieve the characters of the string.
+ # Aliasing issues is the responsibility of the caller.
+ private init full(items: NativeString, bytelen, from, to, length: Int)
do
- if real_items != null then
- return real_items.as(not null)
- else
- var newItems = new NativeString(length + 1)
- self.items.copy_to(newItems, length, index_from, 0)
- newItems[length] = 0u8
- self.real_items = newItems
- return newItems
- end
+ self.items = items
+ self.length = length
+ self.bytelen = bytelen
+ first_byte = from
+ last_byte = to
+ end
+
+ redef fun to_cstring do
+ if real_items != null then return real_items.as(not null)
+ var new_items = new NativeString(bytelen + 1)
+ self.items.copy_to(new_items, bytelen, first_byte, 0)
+ new_items[bytelen] = 0u8
+ real_items = new_items
+ return new_items
end
redef fun ==(other)
if self.object_id == other.object_id then return true
- var my_length = length
+ var my_length = bytelen
- if other.length != my_length then return false
+ if other.bytelen != my_length then return false
- var my_index = index_from
- var its_index = other.index_from
+ var my_index = first_byte
+ var its_index = other.first_byte
var last_iteration = my_index + my_length
if self.object_id == other.object_id then return false
- var my_curr_char : Char
- var its_curr_char : Char
+ var my_length = self.bytelen
+ var its_length = other.bytelen
- var my_length = self.length
- var its_length = other.length
- var max
+ var max = if my_length < its_length then my_length else its_length
- if my_length < its_length then
- max = my_length
- else
- max = its_length
- end
-
- var my_chars = chars
- var its_chars = other.chars
+ var myits = self.bytes
+ var itsits = other.bytes
- var pos = 0
- while pos < max do
- my_curr_char = my_chars[pos]
- its_curr_char = its_chars[pos]
+ for i in [0 .. max[ do
+ var my_curr_char = myits[i]
+ var its_curr_char = itsits[i]
if my_curr_char != its_curr_char then
if my_curr_char < its_curr_char then return true
return false
end
-
- pos += 1
end
return my_length < its_length
end
- redef fun +(s)
- do
- var my_length = self.length
- var its_length = s.length
-
- var total_length = my_length + its_length
-
- var target_string = new NativeString(my_length + its_length + 1)
-
- self.items.copy_to(target_string, my_length, index_from, 0)
- if s isa FlatString then
- s.items.copy_to(target_string, its_length, s.index_from, my_length)
- else if s isa FlatBuffer then
- s.items.copy_to(target_string, its_length, 0, my_length)
+ redef fun +(o) do
+ var s = o.to_s
+ var slen = s.bytelen
+ var mlen = bytelen
+ var nlen = mlen + slen
+ var mits = items
+ var mifrom = first_byte
+ if s isa FlatText then
+ var sits = s.items
+ var sifrom = s.as(FlatString).first_byte
+ var ns = new NativeString(nlen + 1)
+ mits.copy_to(ns, mlen, mifrom, 0)
+ sits.copy_to(ns, slen, sifrom, mlen)
+ return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
else
- var curr_pos = my_length
- for i in [0 .. s.bytelen[ do
- target_string[curr_pos] = s.bytes[i]
- curr_pos += 1
- end
+ abort
end
-
- target_string[total_length] = 0u8
-
- return target_string.to_s_with_length(total_length)
end
- redef fun *(i)
- do
- assert i >= 0
-
- var my_length = self.length
-
- var final_length = my_length * i
-
- var my_items = self.items
-
- var target_string = new NativeString(final_length + 1)
-
- target_string[final_length] = 0u8
-
- var current_last = 0
-
- for iteration in [1 .. i] do
- my_items.copy_to(target_string, my_length, 0, current_last)
- current_last += my_length
+ redef fun *(i) do
+ var mybtlen = bytelen
+ var new_bytelen = mybtlen * i
+ var mylen = length
+ var newlen = mylen * i
+ var ns = new NativeString(new_bytelen + 1)
+ ns[new_bytelen] = 0u8
+ var offset = 0
+ while i > 0 do
+ items.copy_to(ns, bytelen, first_byte, offset)
+ offset += mybtlen
+ i -= 1
end
-
- return target_string.to_s_with_length(final_length)
+ return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
end
+
redef fun hash
do
if hash_cache == null then
# djb2 hash algorithm
var h = 5381
- var i = index_from
+ var i = first_byte
var myitems = items
- while i <= index_to do
+ while i <= last_byte do
h = h.lshift(5) + h + myitems[i].to_i
i += 1
end
init with_pos(tgt: FlatString, pos: Int)
do
- init(tgt, tgt.items, pos + tgt.index_from)
+ init(tgt, tgt.items, pos + tgt.first_byte)
end
- redef fun is_ok do return curr_pos >= target.index_from
+ redef fun is_ok do return curr_pos >= target.first_byte
redef fun item do return target_items[curr_pos]
redef fun next do curr_pos -= 1
- redef fun index do return curr_pos - target.index_from
+ redef fun index do return curr_pos - target.first_byte
end
init with_pos(tgt: FlatString, pos: Int)
do
- init(tgt, tgt.items, pos + tgt.index_from)
+ init(tgt, tgt.items, pos + tgt.first_byte)
end
- redef fun is_ok do return curr_pos <= target.index_to
+ redef fun is_ok do return curr_pos <= target.last_byte
redef fun item do return target_items[curr_pos]
redef fun next do curr_pos += 1
- redef fun index do return curr_pos - target.index_from
+ redef fun index do return curr_pos - target.first_byte
end
redef fun [](index)
do
- # Check that the index (+ index_from) is not larger than indexTo
+ # Check that the index (+ first_byte) is not larger than last_byte
# In other terms, if the index is valid
assert index >= 0
var target = self.target
- assert (index + target.index_from) <= target.index_to
- return target.items[index + target.index_from]
+ assert (index + target.first_byte) <= target.last_byte
+ return target.items[index + target.first_byte]
end
redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
redef var bytes: Sequence[Byte] = new FlatBufferByteView(self) is lazy
- private var capacity: Int = 0
+ redef var bytelen = 0
+
+ # O(n)
+ redef fun length do
+ var max = bytelen
+ if max == 0 then return 0
+ var pos = 0
+ var ln = 0
+ var its = items
+ while pos < max do
+ pos += its.length_of_char_at(pos)
+ ln += 1
+ end
+ return ln
+ end
+
+ private var capacity = 0
redef fun fast_cstring do return items.fast_cstring(0)
# the Copy-On-Write flag `written` is set at true.
private fun reset do
var nns = new NativeString(capacity)
- items.copy_to(nns, length, 0, 0)
+ items.copy_to(nns, bytelen, 0, 0)
items = nns
written = false
end
- redef fun [](index)
+ # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
+ #
+ # Internal only, does not modify bytelen or length, this is the caller's responsability
+ private fun rshift_bytes(from: Int, len: Int) do
+ var oit = items
+ var nit = items
+ if bytelen + len > capacity then
+ capacity = capacity * 2 + 2
+ nit = new NativeString(capacity)
+ oit.copy_to(nit, 0, 0, from)
+ end
+ oit.copy_to(nit, bytelen - from, from, from + len)
+ end
+
+ # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
+ #
+ # Internal only, does not modify bytelen or length, this is the caller's responsability
+ private fun lshift_bytes(from: Int, len: Int) do
+ items.copy_to(items, bytelen - from, from, from - len)
+ end
+
+ redef fun [](i)
do
- assert index >= 0
- assert index < length
- return items[index].to_i.ascii
+ assert i < length and i >= 0
+ return items.char_at(items.char_to_byte_index(i))
end
redef fun []=(index, item)
do
+ assert index >= 0 and index <= length
+ if written then reset
is_dirty = true
if index == length then
- add(item)
+ add item
return
end
- if written then reset
- assert index >= 0 and index < length
- items[index] = item.ascii.to_b
+ var ip = items.char_to_byte_index(index)
+ var c = items.char_at(ip)
+ var clen = c.u8char_len
+ var itemlen = item.u8char_len
+ var size_diff = itemlen - clen
+ if size_diff > 0 then
+ rshift_bytes(ip + clen, size_diff)
+ else if size_diff < 0 then
+ lshift_bytes(ip + clen, -size_diff)
+ end
+ bytelen += size_diff
+ items.set_char_at(ip, item)
end
redef fun add(c)
do
+ if written then reset
is_dirty = true
- if capacity <= length then enlarge(length + 5)
- items[length] = c.ascii.to_b
- length += 1
+ var clen = c.u8char_len
+ enlarge(bytelen + clen)
+ items.set_char_at(bytelen, c)
+ bytelen += clen
end
private fun add_byte(b: Byte) do
+ if written then reset
is_dirty = true
- if capacity <= length then enlarge(length + 5)
+ enlarge(bytelen + 1)
items[bytelen] = b
- length += 1
+ # FIXME: Might trigger errors
+ bytelen += 1
end
redef fun clear do
is_dirty = true
if written then reset
- length = 0
+ bytelen = 0
end
redef fun empty do return new Buffer
# it does a copy of the current `Buffer`
written = false
var a = new NativeString(c+1)
- if length > 0 then items.copy_to(a, length, 0, 0)
+ if bytelen > 0 then items.copy_to(a, bytelen, 0, 0)
items = a
capacity = c
end
redef fun to_s
do
written = true
- if length == 0 then items = new NativeString(1)
- return new FlatString.with_infos(items, length, 0, length - 1)
+ if bytelen == 0 then items = new NativeString(1)
+ return new FlatString.with_infos(items, bytelen, 0, bytelen - 1)
end
redef fun to_cstring
do
if is_dirty then
- var new_native = new NativeString(length + 1)
- new_native[length] = 0u8
- if length > 0 then items.copy_to(new_native, length, 0, 0)
+ var new_native = new NativeString(bytelen + 1)
+ new_native[bytelen] = 0u8
+ if length > 0 then items.copy_to(new_native, bytelen, 0, 0)
real_items = new_native
is_dirty = false
end
#
# If `items` is shared, `written` should be set to true after the creation
# so that a modification will do a copy-on-write.
- private init with_infos(items: NativeString, capacity, length: Int)
+ private init with_infos(items: NativeString, capacity, bytelen: Int)
do
self.items = items
- self.length = length
self.capacity = capacity
+ self.bytelen = bytelen
end
# Create a new string copied from `s`.
init from(s: Text)
do
- capacity = s.length + 1
- length = s.length
- items = new NativeString(capacity)
- if s isa FlatString then
- s.items.copy_to(items, length, s.index_from, 0)
- else if s isa FlatBuffer then
- s.items.copy_to(items, length, 0, 0)
+ items = new NativeString(s.bytelen)
+ if s isa FlatText then
+ items = s.items
else
- var curr_pos = 0
- for i in s.bytes do
- items[curr_pos] = i
- curr_pos += 1
- end
+ for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0)
end
+ bytelen = s.bytelen
+ capacity = s.bytelen
+ written = true
end
# Create a new empty string with a given capacity.
init with_capacity(cap: Int)
do
assert cap >= 0
- items = new NativeString(cap+1)
+ items = new NativeString(cap + 1)
capacity = cap
- length = 0
+ bytelen = 0
end
redef fun append(s)
do
if s.is_empty then return
is_dirty = true
- var sl = s.length
- if capacity < length + sl then enlarge(length + sl)
+ var sl = s.bytelen
+ enlarge(bytelen + sl)
if s isa FlatString then
- s.items.copy_to(items, sl, s.index_from, length)
+ s.items.copy_to(items, sl, s.first_byte, bytelen)
else if s isa FlatBuffer then
- s.items.copy_to(items, sl, 0, length)
+ s.items.copy_to(items, sl, 0, bytelen)
else
- var curr_pos = self.length
- for i in s.bytes do
- items[curr_pos] = i
- curr_pos += 1
- end
+ for i in s.substrings do append i
+ return
end
- length += sl
+ bytelen += sl
end
# Copies the content of self in `dest`
redef fun substring(from, count)
do
assert count >= 0
- count += from
if from < 0 then from = 0
- if count > length then count = length
- if from < count then
- var len = count - from
- var r_items = new NativeString(len)
- items.copy_to(r_items, len, from, 0)
- var r = new FlatBuffer.with_infos(r_items, len, len)
- return r
+ if (from + count) > length then count = length - from
+ if count != 0 then
+ var bytefrom = items.char_to_byte_index(from)
+ var byteto = items.char_to_byte_index(count + from - 1)
+ byteto += items.char_at(byteto).u8char_len - 1
+ var byte_length = byteto - bytefrom + 1
+ var r_items = new NativeString(byte_length)
+ items.copy_to(r_items, byte_length, bytefrom, 0)
+ return new FlatBuffer.with_infos(r_items, byte_length, byte_length)
else
return new Buffer
end
redef fun reverse
do
written = false
- var ns = new NativeString(capacity)
- var si = length - 1
- var ni = 0
- var it = items
- while si >= 0 do
- ns[ni] = it[si]
- ni += 1
- si -= 1
- end
- items = ns
+ var ns = new FlatBuffer.with_capacity(capacity)
+ for i in chars.reverse_iterator do ns.add i
+ items = ns.items
end
redef fun times(repeats)
do
- var x = new FlatString.with_infos(items, length, 0, length - 1)
- for i in [1..repeats[ do
+ var x = new FlatString.with_infos(items, bytelen, 0, bytelen - 1)
+ for i in [1 .. repeats[ do
append(x)
end
end
redef fun upper
do
if written then reset
- var id = length - 1
- while id >= 0 do
- self[id] = self[id].to_upper
- id -= 1
- end
+ for i in [0 .. length[ do self[i] = self[i].to_upper
end
redef fun lower
do
if written then reset
- var id = length - 1
- while id >= 0 do
- self[id] = self[id].to_lower
- id -= 1
- end
+ for i in [0 .. length[ do self[i] = self[i].to_lower
end
end
redef fun index do return curr_pos
- redef fun is_ok do return curr_pos < target.length
+ redef fun is_ok do return curr_pos < target.bytelen
redef fun item do return target_items[curr_pos]
str.real_items = new_self
return str
end
+
+ # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
+ #
+ # Very unsafe, make sure to have room for this char prior to calling this function.
+ private fun set_char_at(pos: Int, c: Char) do
+ var ln = c.u8char_len
+ native_set_char(pos, c, ln)
+ end
+
+ private fun native_set_char(pos: Int, c: Char, ln: Int) `{
+ char* dst = self + pos;
+ switch(ln){
+ case 1:
+ dst[0] = c;
+ break;
+ case 2:
+ dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
+ dst[1] = 0x80 | (c & 0x3F);
+ break;
+ case 3:
+ dst[0] = 0xE0 | ((c & 0xF000) >> 12);
+ dst[1] = 0x80 | ((c & 0xFC0) >> 6);
+ dst[2] = 0x80 | (c & 0x3F);
+ break;
+ case 4:
+ dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
+ dst[1] = 0x80 | ((c & 0x3F000) >> 12);
+ dst[2] = 0x80 | ((c & 0xFC0) >> 6);
+ dst[3] = 0x80 | (c & 0x3F);
+ break;
+ }
+ `}
end
redef class Int
var ns = new NativeString(nslen + 1)
ns[nslen] = 0u8
native_int_to_s(ns, nslen + 1)
- return ns.to_s_with_length(nslen)
+ return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
end
end
continue
end
var tmp = itsi.to_s
- sl += tmp.length
+ sl += tmp.bytelen
na[mypos] = tmp
i += 1
mypos += 1
var off = 0
while i < mypos do
var tmp = na[i]
- var tpl = tmp.length
if tmp isa FlatString then
- tmp.items.copy_to(ns, tpl, tmp.index_from, off)
+ var tpl = tmp.bytelen
+ tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
off += tpl
else
for j in tmp.substrings do
var s = j.as(FlatString)
- var slen = s.length
- s.items.copy_to(ns, slen, s.index_from, off)
+ var slen = s.bytelen
+ s.items.copy_to(ns, slen, s.first_byte, off)
off += slen
end
end
var sl = 0
var mypos = 0
while i < l do
- sl += na[i].length
+ sl += na[i].bytelen
i += 1
mypos += 1
end
var off = 0
while i < mypos do
var tmp = na[i]
- var tpl = tmp.length
if tmp isa FlatString then
- tmp.items.copy_to(ns, tpl, tmp.index_from, off)
+ var tpl = tmp.bytelen
+ tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
off += tpl
else
for j in tmp.substrings do
var s = j.as(FlatString)
- var slen = s.length
- s.items.copy_to(ns, slen, s.index_from, off)
+ var slen = s.bytelen
+ s.items.copy_to(ns, slen, s.first_byte, off)
off += slen
end
end
module native
import kernel
+import math
+
+redef class Byte
+ # Gives the length of the UTF-8 char starting with `self`
+ private fun u8len: Int do
+ if self & 0b1000_0000u8 == 0u8 then
+ return 1
+ else if self & 0b1110_0000u8 == 0b1100_0000u8 then
+ return 2
+ else if self & 0b1111_0000u8 == 0b1110_0000u8 then
+ return 3
+ else if self & 0b1111_1000u8 == 0b1111_0000u8 then
+ return 4
+ else
+ return 1
+ end
+ end
+end
# Native strings are simple C char *
extern class NativeString `{ char* `}
# Parse `self` as a Float.
fun atof: Float `{ return atof(self); `}
+
+ # Gets the UTF-8 char at index `pos`
+ #
+ # Index is expressed in Unicode chars
+ #
+ # ~~~raw
+ # assert "かきく".as(FlatString).items.char_at(0) == 'か'
+ # ~~~
+ #
+ # If the char at position pos is an invalid Unicode char,
+ # the Unicode replacement character � (0xFFFD) will be used.
+ #
+ # ~~~raw
+ # assert "かきく".as(FlatString).items.char_at(1) == '�'
+ # ~~~
+ fun char_at(pos: Int): Char `{
+ char c = self[pos];
+ if((c & 0x80) == 0x00) return (uint32_t)c;
+ if(((c & 0xE0) == 0xC0) && ((self[pos + 1] & 0xC0) == 0x80)) return ((((uint32_t)c) & 0x1F) << 6) + ((((uint32_t)self[pos + 1] & 0x3F)));
+ if(((c & 0xF0) == 0xE0) && ((self[pos + 1] & 0xC0) == 0x80) && ((self[pos + 2] & 0xC0) == 0x80)) return ((((uint32_t)c) & 0xF) << 12) + ((((uint32_t)self[pos + 1]) & 0x3F) << 6) + ((((uint32_t)self[pos + 2] & 0x3F)));
+ if(((c & 0xF7) == 0xF0) && ((self[pos + 1] & 0xC0) == 0x80) && ((self[pos + 2] & 0xC0) == 0x80) && ((self[pos + 3] & 0xC0) == 0x80)) return ((((uint32_t)c) & 0x7) << 18) + ((((uint32_t)self[pos + 1]) & 0x3F) << 12) + ((((uint32_t)self[pos + 2]) & 0x3F) << 6) + ((((uint32_t)self[pos + 3] & 0x3F)));
+ return 0xFFFD;
+ `}
+
+ # Gets the byte index of char at position `n` in UTF-8 String
+ fun char_to_byte_index(n: Int): Int do return char_to_byte_index_cached(n, 0, 0)
+
+ # Gets the length of the character at position `pos` (1 if invalid sequence)
+ fun length_of_char_at(pos: Int): Int do
+ var c = self[pos]
+ if c & 0x80u8 == 0x00u8 then
+ return 1
+ else if c & 0xE0u8 == 0xC0u8 and self[pos + 1] & 0xC0u8 == 0x80u8 then
+ return 2
+ else if c & 0xF0u8 == 0xE0u8 and self[pos + 1] & 0xC0u8 == 0x80u8 and self[pos + 2] & 0xC0u8 == 0x80u8 then
+ return 3
+ else if c & 0xF7u8 == 0xF0u8 and self[pos + 1] & 0xC0u8 == 0x80u8 and self[pos + 2] & 0xC0u8 == 0x80u8 and self[pos + 3] & 0xC0u8 == 0x80u8 then
+ return 4
+ else
+ return 1
+ end
+ end
+
+ # Gets the byte index of char at position `n` in UTF-8 String
+ #
+ # `char_from` and `byte_from` are cached values to seek from.
+ #
+ # NOTE: char_from and byte_from are not guaranteed to be valid cache values
+ # It it up to the client to ensure the validity of the information
+ fun char_to_byte_index_cached(n, char_from, byte_from: Int): Int do
+ var ns_i = byte_from
+ var my_i = char_from
+
+ while my_i < n do
+ ns_i += length_of_char_at(ns_i)
+ my_i += 1
+ end
+
+ while my_i > n do
+ ns_i = find_beginning_of_char_at(ns_i - 1)
+ my_i -= 1
+ end
+
+ return ns_i
+ end
+
+ # Returns the beginning position of the char at position `pos`
+ #
+ # If the char is invalid UTF-8, `pos` is returned as-is
+ #
+ # ~~~raw
+ # assert "abc".items.find_beginning_of_char_at(2) == 2
+ # assert "か".items.find_beginning_of_char_at(1) == 0
+ # assert [0x41u8, 233u8].to_s.items.find_beginning_of_char_at(1) == 1
+ # ~~~
+ fun find_beginning_of_char_at(pos: Int): Int do
+ var endpos = pos
+ var c = self[pos]
+ while c & 0xC0u8 == 0x80u8 do
+ pos -= 1
+ c = self[pos]
+ end
+ var stpos = pos
+ if length_of_char_at(stpos) >= (endpos - stpos + 1) then return pos
+ return endpos
+ end
end
redef var length is noinit
+ redef var bytelen is noinit
+
redef fun substrings do return new RopeSubstrings(self)
redef fun empty do return ""
redef var to_cstring is lazy do
- var len = length
+ var len = bytelen
var ns = new NativeString(len + 1)
ns[len] = 0u8
var off = 0
for i in substrings do
- var ilen = i.length
- i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off)
+ var ilen = i.bytelen
+ i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).first_byte, off)
off += ilen
end
return ns
init do
length = left.length + right.length
+ bytelen = left.bytelen + right.bytelen
end
redef fun output do
redef fun +(o) do
var s = o.to_s
- var slen = s.length
+ var slen = s.bytelen
if s isa Concat then
return new Concat(self, s)
else
var r = right
- var rlen = r.length
+ var rlen = r.bytelen
if rlen + slen > maxlen then return new Concat(self, s)
return new Concat(left, r + s)
end
redef var bytes: Sequence[Byte] is lazy do return new RopeBufferBytes(self)
# The final string being built on the fly
- private var str: String is noinit
+ private var str: String = ""
# Current concatenation buffer
private var ns: NativeString is noinit
# a long string (length > maxlen) is appended.
private var dumped: Int is noinit
- # Length of the complete rope
- redef var length = 0
+ # Length of the complete rope in chars (0)
+ redef fun length do
+ var st = dumped
+ var len = str.length
+ while st < rpos do
+ st += ns[st].u8len
+ len += 1
+ end
+ return len
+ end
+
+ # Length of the complete rope in bytes
+ redef var bytelen = 0
- # Length of the mutable part
+ # Length of the mutable part (in bytes)
#
# Is also used as base to compute the size of the next
# mutable native string (`ns`)
# Builds an empty `RopeBuffer`
init do
- str = ""
ns = new NativeString(maxlen)
buf_size = maxlen
dumped = 0
self.str = str
ns = new NativeString(maxlen)
buf_size = maxlen
- length = str.length
+ bytelen = str.length
dumped = 0
end
written = false
end
+ redef fun [](i) do
+ if i < str.length then
+ return str[i]
+ else
+ var index = ns.char_to_byte_index_cached(i - str.length, 0, dumped)
+ return ns.char_at(index)
+ end
+ end
+
+ redef fun []=(i, c) do
+ assert i >= 0 and i <= length
+ if i == length then add c
+ if i < str.length then
+ bytelen += c.u8char_len - str[i].u8char_len
+ var s = str
+ var l = s.substring(0, i)
+ var r = s.substring_from(i + 1)
+ str = l + c.to_s + r
+ else
+ var reali = i - str.length
+ var index = ns.char_to_byte_index_cached(reali, 0, dumped)
+ var st_nxt = ns.char_to_byte_index_cached(reali + 1, reali, index)
+ var loc_c = ns.char_at(index)
+ if loc_c.u8char_len != c.u8char_len then
+ var delta = c.u8char_len - loc_c.u8char_len
+ var remsp = buf_size - rpos
+ if remsp < delta then
+ buf_size *= 2
+ var nns = new NativeString(buf_size)
+ ns.copy_to(nns, index - dumped, dumped, 0)
+ ns.copy_to(nns, rpos - index - loc_c.u8char_len, index + loc_c.u8char_len, index - dumped + delta)
+ ns = nns
+ index = index - dumped
+ else
+ ns.copy_to(ns, rpos - st_nxt, st_nxt, st_nxt + delta)
+ end
+ bytelen += delta
+ rpos += delta
+ end
+ ns.set_char_at(index, c)
+ end
+ end
+
redef fun empty do return new RopeBuffer
redef fun clear do
str = ""
- length = 0
+ bytelen = 0
rpos = 0
dumped = 0
if written then
end
redef fun append(s) do
- var slen = s.length
- length += slen
- var rp = rpos
- if s isa Rope or slen > maxlen then
- if rp > 0 and dumped != rp then
- str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1)
- dumped = rp
- end
- str = str + s
+ var slen = s.bytelen
+ if slen >= maxlen then
+ persist_buffer
+ str += s.to_s
return
end
- var remsp = buf_size - rp
- var sits: NativeString
- var begin: Int
- if s isa FlatString then
- begin = s.index_from
- sits = s.items
- else if s isa FlatBuffer then
- begin = 0
- sits = s.items
- else
+ if s isa FlatText then
+ var oits = s.items
+ var from = if s isa FlatString then s.first_byte else 0
+ var remsp = buf_size - rpos
if slen <= remsp then
- for i in s.bytes do
- ns[rpos] = i
- rpos += 1
- end
- else
- var spos = 0
- for i in [0..remsp[ do
- ns[rpos] = s.bytes[spos]
- rpos += 1
- spos += 1
- end
- dump_buffer
- while spos < slen do
- ns[rpos] = s.bytes[spos]
- spos += 1
- rpos += 1
- end
- end
- return
- end
- if slen <= remsp then
- if remsp <= 0 then
- dump_buffer
- rpos = 0
- else
- sits.copy_to(ns, slen, begin, rp)
+ oits.copy_to(ns, slen, from, rpos)
rpos += slen
+ return
end
- else
- sits.copy_to(ns, remsp, begin, rp)
- rpos = buf_size
+ var brk = oits.find_beginning_of_char_at(from + remsp)
+ oits.copy_to(ns, brk, from, rpos)
+ rpos += brk
dump_buffer
- var nlen = slen - remsp
- sits.copy_to(ns, nlen, begin + remsp, 0)
- rpos = nlen
+ oits.copy_to(ns, slen - remsp, brk, 0)
+ rpos = slen - remsp
+ else
+ for i in s.substrings do append i
end
end
# TODO: Fix when supporting UTF-8
ns[rp] = c.ascii.to_b
rp += 1
- length += 1
+ bytelen += 1
rpos = rp
end
end
ns[rp] = b
rp += 1
- length += 1
+ bytelen += 1
rpos = rp
end
ns = new NativeString(bs)
buf_size = bs
dumped = 0
+ rpos = 0
end
# Similar to dump_buffer, but does not reallocate a new NativeString
private fun persist_buffer do
+ if rpos == dumped then return
var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
str += nstr
dumped = rpos
redef fun enlarge(i) do end
redef fun to_s do
- written = true
- var nnslen = rpos - dumped
- if nnslen == 0 then return str
- return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
+ dump_buffer
+ return str
end
redef fun reverse do
redef fun +(o) do
var s = o.to_s
- var slen = s.length
- var mlen = length
+ var slen = s.bytelen
+ var mlen = bytelen
if slen == 0 then return self
if mlen == 0 then return s
var nlen = slen + mlen
if s isa FlatString then
if nlen > maxlen then return new Concat(self, s)
var mits = items
- var sifrom = s.index_from
- var mifrom = index_from
+ var sifrom = s.first_byte
+ var mifrom = first_byte
var sits = s.items
var ns = new NativeString(nlen + 1)
mits.copy_to(ns, mlen, mifrom, 0)
return ns.to_s_with_length(nlen)
else if s isa Concat then
var sl = s.left
- var sllen = sl.length
+ var sllen = sl.bytelen
if sllen + mlen > maxlen then return new Concat(self, s)
return new Concat(self + sl, s.right)
else
var subs: IndexedIterator[FlatString]
init(root: Concat) is old_style_init do
- pos = root.length - 1
+ pos = root.bytelen - 1
subs = new ReverseRopeSubstrings(root)
var s = subs.item
ns = s.items
- pns = s.index_to
+ pns = s.last_byte
end
init from(root: Concat, pos: Int) do
if not subs.is_ok then return
var s = subs.item
ns = s.items
- pns = s.index_to
+ pns = s.last_byte
end
end
redef fun next do
pns += 1
pos += 1
- if pns < subs.item.length then return
+ if pns < subs.item.bytelen then return
if not subs.is_ok then return
subs.next
if not subs.is_ok then return
redef type SELFTYPE: Concat
redef fun [](i) do
- var b: Int
var nod: String = target
loop
if nod isa FlatString then return nod.items[i]
redef type SELFTYPE: RopeBuffer
- redef fun [](i) do
- if i < target.str.length then
- return target.str[i]
- else
- # TODO: Fix when supporting UTF-8
- return target.ns[i - target.str.length].to_i.ascii
- end
- end
+ redef fun [](i) do return target[i]
- redef fun []=(i,c) do
- if i == target.length then target.add c
- if i < target.str.length then
- var s = target.str
- var l = s.substring(0, i)
- var r = s.substring_from(i + 1)
- target.str = l + c.to_s + r
- else
- # TODO: Fix when supporting UTF-8
- target.ns[i - target.str.length] = c.to_i.to_b
- end
- end
+ redef fun []=(i,c) do target[i] = c
redef fun add(c) do target.add c
# Init the iterator from a RopeBuffer.
init(t: RopeBuffer) is old_style_init do
ns = t.ns
- maxpos = t.rpos
+ maxpos = t.bytelen
sit = t.str.bytes.iterator
pns = t.dumped
index = 0
# Init the iterator from a RopeBuffer starting from `pos`.
init from(t: RopeBuffer, pos: Int) do
ns = t.ns
- maxpos = t.length
+ maxpos = t.bytelen
sit = t.str.bytes.iterator_from(pos)
pns = pos - t.str.length
index = pos
init(tgt: RopeBuffer) is old_style_init do
sit = tgt.str.bytes.reverse_iterator
pns = tgt.rpos - 1
- index = tgt.length - 1
+ index = tgt.bytelen - 1
ns = tgt.ns
end
# Init the iterator from a RopeBuffer starting from `pos`.
init from(tgt: RopeBuffer, pos: Int) do
- sit = tgt.str.bytes.reverse_iterator_from(pos - tgt.rpos - tgt.dumped)
- pns = pos - tgt.str.length
+ sit = tgt.str.bytes.reverse_iterator_from(pos - (tgt.rpos - tgt.dumped))
+ pns = pos - tgt.str.bytelen + tgt.rpos
index = pos
ns = tgt.ns
end
- redef fun is_ok do return index > 0
+ redef fun is_ok do return index >= 0
redef fun item do
if pns >= 0 then return ns[pns]
redef fun next do
index -= 1
- if pns >= 0 then
+ if pns > 0 then
pns -= 1
else
sit.next
if i < target.str.bytelen then
return target.str.bytes[i]
else
- return target.ns[i - target.str.length]
+ return target.ns[i - target.str.bytelen]
end
end
ans_buffer.add(msg.length.to_b)
end
if msg isa FlatString then
- ans_buffer.append_ns_from(msg.items, msg.length, msg.index_from)
+ ans_buffer.append_ns_from(msg.items, msg.length, msg.first_byte)
else
for i in msg.substrings do
- ans_buffer.append_ns_from(i.as(FlatString).items, i.length, i.as(FlatString).index_from)
+ ans_buffer.append_ns_from(i.as(FlatString).items, i.length, i.as(FlatString).first_byte)
end
end
return ans_buffer
fun char_instance(value: Char): RuntimeVariable
do
var t = mmodule.char_type
- var res = new RuntimeVariable("'{value.to_s.escape_to_c}'", t, t)
- return res
+
+ if value.ascii < 128 then
+ return new RuntimeVariable("'{value.to_s.escape_to_c}'", t, t)
+ else
+ return new RuntimeVariable("{value.ascii}", t, t)
+ end
end
# Generate a float value
var native_mtype = mmodule.native_string_type
var nat = self.new_var(native_mtype)
self.add("{nat} = \"{string.escape_to_c}\";")
- var length = self.int_instance(string.length)
+ var length = self.int_instance(string.bytelen)
self.add("{res} = {self.send(self.get_property("to_s_with_length", native_mtype), [nat, length]).as(not null)};")
self.add("{name} = {res};")
self.add("\}")
return true
end
else if cname == "Char" then
- if pname == "output" then
- v.add("printf(\"%c\", ((unsigned char){arguments.first}));")
- return true
- else if pname == "object_id" then
+ if pname == "object_id" then
v.ret(v.new_expr("(long){arguments.first}", ret.as(not null)))
return true
else if pname == "successor" then
# Return a new native string initialized with `txt`
fun native_string_instance(txt: String): Instance
do
- var instance = native_string_instance_len(txt.length+1)
+ var instance = native_string_instance_len(txt.bytelen+1)
var val = instance.val
- val[txt.length] = 0u8
- txt.to_cstring.copy_to(val, txt.length, 0, 0)
+ val[txt.bytelen] = 0u8
+ txt.to_cstring.copy_to(val, txt.bytelen, 0, 0)
return instance
end
fun string_instance(txt: String): Instance
do
var nat = native_string_instance(txt)
- var res = self.send(self.force_get_primitive_method("to_s_with_length", nat.mtype), [nat, self.int_instance(txt.length)])
+ var res = self.send(self.force_get_primitive_method("to_s_with_length", nat.mtype), [nat, self.int_instance(txt.bytelen)])
assert res != null
return res
end
if sp >= string_len then
dfa_state = -1
else
+ # Very ugly hack, this is because of the way SableCC generates its tables.
+ # Due to the 0xFFFF limit of a Java char, when a big Nit char is read (i.e.
+ # code point > 65535), it crashes.
+ #
+ # Hence, if a char has a code point <= 255 (ISO8859 range), it is left as is.
+ # Else, it is replaced by 255.
+ # This does not corrupt the lexer and works perfectly on any character.
+ #
+ # TL;DR: Java fucked up, need retarded solution to cope for retarded decision
var c = string[sp].ascii
+ if c >= 256 then c = 255
sp += 1
var cr = _cr
s = "Je dis «{s}» et redis «{s}» et trois fois de plus : «{s}{s}{s}».\n"
i = i + 1
end
-print(s.length)
+print s.bytelen
s = ["Je dis «", s, "» et redis «", s, "» et trois fois de plus : «", s, s, s, "».\n"].plain_to_s
i = i + 1
end
-print(s.length)
+print(s.bytelen)
input
first_letter_last_letter
fibonacci_word
+shootout_nsieve
input
first_letter_last_letter
fibonacci_word
+shootout_nsieve
../lib/standard/kernel.nit:333,1--415,3: Error: `kernel#Float` does not specialize `module_0#Object`. Possible duplication of the root class `Object`?
../lib/standard/kernel.nit:417,1--519,3: Error: `kernel#Byte` does not specialize `module_0#Object`. Possible duplication of the root class `Object`?
../lib/standard/kernel.nit:521,1--712,3: Error: `kernel#Int` does not specialize `module_0#Object`. Possible duplication of the root class `Object`?
-../lib/standard/kernel.nit:714,1--854,3: Error: `kernel#Char` does not specialize `module_0#Object`. Possible duplication of the root class `Object`?
-../lib/standard/kernel.nit:856,1--863,3: Error: `kernel#Pointer` does not specialize `module_0#Object`. Possible duplication of the root class `Object`?
+../lib/standard/kernel.nit:714,1--867,3: Error: `kernel#Char` does not specialize `module_0#Object`. Possible duplication of the root class `Object`?
+../lib/standard/kernel.nit:869,1--876,3: Error: `kernel#Pointer` does not specialize `module_0#Object`. Possible duplication of the root class `Object`?
Montpellier
-*** Entrepôt Lunel ***
-L'entrepôt est vide
+*** Entrep�t Lunel ***
+L'entrep�t est vide
Carotte:15
Carotte:20
-* Rayon : Légumes
+* Rayon : L�gumes
-* Rayon : Légumes
+* Rayon : L�gumes
Carotte:15
Navet:10
Chou:3
-* Rayon : Légumes
+* Rayon : L�gumes
Carotte:15
Navet:10
Chou:13
Courge:1
-*** Entrepôt Lunel ***
-* Rayon : Légumes
+*** Entrep�t Lunel ***
+* Rayon : L�gumes
Carotte:15
Navet:10
Chou:13
Courge:1
-* Rayon Réfrigéré : Surgelés - t° max : -5
+* Rayon R�frig�r� : Surgel�s - t� max : -5
Pizza:12
-Poisson pané:4
+Poisson pan�:4
--- /dev/null
+Char 0 = 𐏓
+Char 1 = A
+Char 2 = A
+Char 3 = A
+Char 4 = A
+Char 5 = A
+Char 6 = A
+Char 7 = A
+Char 8 = A
+Char 9 = A
+Char 10 = A
+Char 11 = A
+Char 12 = A
+Char 13 = A
+Char 14 = A
+Char 15 = A
+Char 16 = A
+Char 17 = A
+Char 18 = A
+Char 19 = A
+Char 20 = A
+Char 21 = A
+Char 22 = A
+Char 23 = A
+Char 24 = A
+Char 25 = A
+Char 26 = A
+Char 27 = A
+Char 28 = A
+Char 29 = A
+Char 30 = A
+Char 31 = A
+Char 32 = Z
+Char 33 = A
+Char 34 = A
+Char 35 = A
+Char 36 = A
+Char 37 = A
+Char 38 = A
+Char 39 = A
+Char 40 = A
+Char 41 = A
+Char 42 = A
+Char 43 = A
+Char 44 = A
+Char 45 = A
+Char 46 = A
+Char 47 = A
+Char 48 = A
+Char 49 = A
+Char 50 = A
+Char 51 = A
+Char 52 = A
+Char 53 = A
+Char 54 = A
+Char 55 = A
+Char 56 = A
+Char 57 = A
+Char 58 = A
+Char 59 = A
+Char 60 = A
+Char 61 = A
+Char 62 = A
+Char 63 = あ
+Byte 0 = 0xf0
+Byte 1 = 0x90
+Byte 2 = 0x8f
+Byte 3 = 0x93
+Byte 4 = 0x41
+Byte 5 = 0x41
+Byte 6 = 0x41
+Byte 7 = 0x41
+Byte 8 = 0x41
+Byte 9 = 0x41
+Byte 10 = 0x41
+Byte 11 = 0x41
+Byte 12 = 0x41
+Byte 13 = 0x41
+Byte 14 = 0x41
+Byte 15 = 0x41
+Byte 16 = 0x41
+Byte 17 = 0x41
+Byte 18 = 0x41
+Byte 19 = 0x41
+Byte 20 = 0x41
+Byte 21 = 0x41
+Byte 22 = 0x41
+Byte 23 = 0x41
+Byte 24 = 0x41
+Byte 25 = 0x41
+Byte 26 = 0x41
+Byte 27 = 0x41
+Byte 28 = 0x41
+Byte 29 = 0x41
+Byte 30 = 0x41
+Byte 31 = 0x41
+Byte 32 = 0x41
+Byte 33 = 0x41
+Byte 34 = 0x41
+Byte 35 = 0x5a
+Byte 36 = 0x41
+Byte 37 = 0x41
+Byte 38 = 0x41
+Byte 39 = 0x41
+Byte 40 = 0x41
+Byte 41 = 0x41
+Byte 42 = 0x41
+Byte 43 = 0x41
+Byte 44 = 0x41
+Byte 45 = 0x41
+Byte 46 = 0x41
+Byte 47 = 0x41
+Byte 48 = 0x41
+Byte 49 = 0x41
+Byte 50 = 0x41
+Byte 51 = 0x41
+Byte 52 = 0x41
+Byte 53 = 0x41
+Byte 54 = 0x41
+Byte 55 = 0x41
+Byte 56 = 0x41
+Byte 57 = 0x41
+Byte 58 = 0x41
+Byte 59 = 0x41
+Byte 60 = 0x41
+Byte 61 = 0x41
+Byte 62 = 0x41
+Byte 63 = 0x41
+Byte 64 = 0x41
+Byte 65 = 0x41
+Byte 66 = 0xe3
+Byte 67 = 0x81
+Byte 68 = 0x82
+Char 63 = あ
+Char 62 = A
+Char 61 = A
+Char 60 = A
+Char 59 = A
+Char 58 = A
+Char 57 = A
+Char 56 = A
+Char 55 = A
+Char 54 = A
+Char 53 = A
+Char 52 = A
+Char 51 = A
+Char 50 = A
+Char 49 = A
+Char 48 = A
+Char 47 = A
+Char 46 = A
+Char 45 = A
+Char 44 = A
+Char 43 = A
+Char 42 = A
+Char 41 = A
+Char 40 = A
+Char 39 = A
+Char 38 = A
+Char 37 = A
+Char 36 = A
+Char 35 = A
+Char 34 = A
+Char 33 = A
+Char 32 = Z
+Char 31 = A
+Char 30 = A
+Char 29 = A
+Char 28 = A
+Char 27 = A
+Char 26 = A
+Char 25 = A
+Char 24 = A
+Char 23 = A
+Char 22 = A
+Char 21 = A
+Char 20 = A
+Char 19 = A
+Char 18 = A
+Char 17 = A
+Char 16 = A
+Char 15 = A
+Char 14 = A
+Char 13 = A
+Char 12 = A
+Char 11 = A
+Char 10 = A
+Char 9 = A
+Char 8 = A
+Char 7 = A
+Char 6 = A
+Char 5 = A
+Char 4 = A
+Char 3 = A
+Char 2 = A
+Char 1 = A
+Char 0 = 𐏓
+Byte 68 = 0x82
+Byte 67 = 0x81
+Byte 66 = 0xe3
+Byte 65 = 0x41
+Byte 64 = 0x41
+Byte 63 = 0x41
+Byte 62 = 0x41
+Byte 61 = 0x41
+Byte 60 = 0x41
+Byte 59 = 0x41
+Byte 58 = 0x41
+Byte 57 = 0x41
+Byte 56 = 0x41
+Byte 55 = 0x41
+Byte 54 = 0x41
+Byte 53 = 0x41
+Byte 52 = 0x41
+Byte 51 = 0x41
+Byte 50 = 0x41
+Byte 49 = 0x41
+Byte 48 = 0x41
+Byte 47 = 0x41
+Byte 46 = 0x41
+Byte 45 = 0x41
+Byte 44 = 0x41
+Byte 43 = 0x41
+Byte 42 = 0x41
+Byte 41 = 0x41
+Byte 40 = 0x41
+Byte 39 = 0x41
+Byte 38 = 0x41
+Byte 37 = 0x41
+Byte 36 = 0x41
+Byte 35 = 0x5a
+Byte 34 = 0x41
+Byte 33 = 0x41
+Byte 32 = 0x41
+Byte 31 = 0x41
+Byte 30 = 0x41
+Byte 29 = 0x41
+Byte 28 = 0x41
+Byte 27 = 0x41
+Byte 26 = 0x41
+Byte 25 = 0x41
+Byte 24 = 0x41
+Byte 23 = 0x41
+Byte 22 = 0x41
+Byte 21 = 0x41
+Byte 20 = 0x41
+Byte 19 = 0x41
+Byte 18 = 0x41
+Byte 17 = 0x41
+Byte 16 = 0x41
+Byte 15 = 0x41
+Byte 14 = 0x41
+Byte 13 = 0x41
+Byte 12 = 0x41
+Byte 11 = 0x41
+Byte 10 = 0x41
+Byte 9 = 0x41
+Byte 8 = 0x41
+Byte 7 = 0x41
+Byte 6 = 0x41
+Byte 5 = 0x41
+Byte 4 = 0x41
+Byte 3 = 0x93
+Byte 2 = 0x8f
+Byte 1 = 0x90
+Byte 0 = 0xf0
--- /dev/null
+Char 0 = 𐏓
+Char 1 = A
+Char 2 = A
+Char 3 = A
+Char 4 = A
+Char 5 = A
+Char 6 = A
+Char 7 = A
+Char 8 = A
+Char 9 = A
+Char 10 = A
+Char 11 = A
+Char 12 = A
+Char 13 = A
+Char 14 = A
+Char 15 = A
+Char 16 = A
+Char 17 = A
+Char 18 = A
+Char 19 = A
+Char 20 = A
+Char 21 = A
+Char 22 = A
+Char 23 = A
+Char 24 = A
+Char 25 = A
+Char 26 = A
+Char 27 = A
+Char 28 = A
+Char 29 = A
+Char 30 = A
+Char 31 = A
+Char 32 = Z
+Char 33 = A
+Char 34 = A
+Char 35 = A
+Char 36 = A
+Char 37 = A
+Char 38 = A
+Char 39 = A
+Char 40 = A
+Char 41 = A
+Char 42 = A
+Char 43 = A
+Char 44 = A
+Char 45 = A
+Char 46 = A
+Char 47 = A
+Char 48 = A
+Char 49 = A
+Char 50 = A
+Char 51 = A
+Char 52 = A
+Char 53 = A
+Char 54 = A
+Char 55 = A
+Char 56 = A
+Char 57 = A
+Char 58 = A
+Char 59 = A
+Char 60 = A
+Char 61 = A
+Char 62 = A
+Char 63 = あ
+Byte 0 = 0xf0
+Byte 1 = 0x90
+Byte 2 = 0x8f
+Byte 3 = 0x93
+Byte 4 = 0x41
+Byte 5 = 0x41
+Byte 6 = 0x41
+Byte 7 = 0x41
+Byte 8 = 0x41
+Byte 9 = 0x41
+Byte 10 = 0x41
+Byte 11 = 0x41
+Byte 12 = 0x41
+Byte 13 = 0x41
+Byte 14 = 0x41
+Byte 15 = 0x41
+Byte 16 = 0x41
+Byte 17 = 0x41
+Byte 18 = 0x41
+Byte 19 = 0x41
+Byte 20 = 0x41
+Byte 21 = 0x41
+Byte 22 = 0x41
+Byte 23 = 0x41
+Byte 24 = 0x41
+Byte 25 = 0x41
+Byte 26 = 0x41
+Byte 27 = 0x41
+Byte 28 = 0x41
+Byte 29 = 0x41
+Byte 30 = 0x41
+Byte 31 = 0x41
+Byte 32 = 0x41
+Byte 33 = 0x41
+Byte 34 = 0x41
+Byte 35 = 0x5a
+Byte 36 = 0x41
+Byte 37 = 0x41
+Byte 38 = 0x41
+Byte 39 = 0x41
+Byte 40 = 0x41
+Byte 41 = 0x41
+Byte 42 = 0x41
+Byte 43 = 0x41
+Byte 44 = 0x41
+Byte 45 = 0x41
+Byte 46 = 0x41
+Byte 47 = 0x41
+Byte 48 = 0x41
+Byte 49 = 0x41
+Byte 50 = 0x41
+Byte 51 = 0x41
+Byte 52 = 0x41
+Byte 53 = 0x41
+Byte 54 = 0x41
+Byte 55 = 0x41
+Byte 56 = 0x41
+Byte 57 = 0x41
+Byte 58 = 0x41
+Byte 59 = 0x41
+Byte 60 = 0x41
+Byte 61 = 0x41
+Byte 62 = 0x41
+Byte 63 = 0x41
+Byte 64 = 0x41
+Byte 65 = 0x41
+Byte 66 = 0xe3
+Byte 67 = 0x81
+Byte 68 = 0x82
+Char 63 = あ
+Char 62 = A
+Char 61 = A
+Char 60 = A
+Char 59 = A
+Char 58 = A
+Char 57 = A
+Char 56 = A
+Char 55 = A
+Char 54 = A
+Char 53 = A
+Char 52 = A
+Char 51 = A
+Char 50 = A
+Char 49 = A
+Char 48 = A
+Char 47 = A
+Char 46 = A
+Char 45 = A
+Char 44 = A
+Char 43 = A
+Char 42 = A
+Char 41 = A
+Char 40 = A
+Char 39 = A
+Char 38 = A
+Char 37 = A
+Char 36 = A
+Char 35 = A
+Char 34 = A
+Char 33 = A
+Char 32 = Z
+Char 31 = A
+Char 30 = A
+Char 29 = A
+Char 28 = A
+Char 27 = A
+Char 26 = A
+Char 25 = A
+Char 24 = A
+Char 23 = A
+Char 22 = A
+Char 21 = A
+Char 20 = A
+Char 19 = A
+Char 18 = A
+Char 17 = A
+Char 16 = A
+Char 15 = A
+Char 14 = A
+Char 13 = A
+Char 12 = A
+Char 11 = A
+Char 10 = A
+Char 9 = A
+Char 8 = A
+Char 7 = A
+Char 6 = A
+Char 5 = A
+Char 4 = A
+Char 3 = A
+Char 2 = A
+Char 1 = A
+Char 0 = 𐏓
+Byte 68 = 0x82
+Byte 67 = 0x81
+Byte 66 = 0xe3
+Byte 65 = 0x41
+Byte 64 = 0x41
+Byte 63 = 0x41
+Byte 62 = 0x41
+Byte 61 = 0x41
+Byte 60 = 0x41
+Byte 59 = 0x41
+Byte 58 = 0x41
+Byte 57 = 0x41
+Byte 56 = 0x41
+Byte 55 = 0x41
+Byte 54 = 0x41
+Byte 53 = 0x41
+Byte 52 = 0x41
+Byte 51 = 0x41
+Byte 50 = 0x41
+Byte 49 = 0x41
+Byte 48 = 0x41
+Byte 47 = 0x41
+Byte 46 = 0x41
+Byte 45 = 0x41
+Byte 44 = 0x41
+Byte 43 = 0x41
+Byte 42 = 0x41
+Byte 41 = 0x41
+Byte 40 = 0x41
+Byte 39 = 0x41
+Byte 38 = 0x41
+Byte 37 = 0x41
+Byte 36 = 0x41
+Byte 35 = 0x5a
+Byte 34 = 0x41
+Byte 33 = 0x41
+Byte 32 = 0x41
+Byte 31 = 0x41
+Byte 30 = 0x41
+Byte 29 = 0x41
+Byte 28 = 0x41
+Byte 27 = 0x41
+Byte 26 = 0x41
+Byte 25 = 0x41
+Byte 24 = 0x41
+Byte 23 = 0x41
+Byte 22 = 0x41
+Byte 21 = 0x41
+Byte 20 = 0x41
+Byte 19 = 0x41
+Byte 18 = 0x41
+Byte 17 = 0x41
+Byte 16 = 0x41
+Byte 15 = 0x41
+Byte 14 = 0x41
+Byte 13 = 0x41
+Byte 12 = 0x41
+Byte 11 = 0x41
+Byte 10 = 0x41
+Byte 9 = 0x41
+Byte 8 = 0x41
+Byte 7 = 0x41
+Byte 6 = 0x41
+Byte 5 = 0x41
+Byte 4 = 0x41
+Byte 3 = 0x93
+Byte 2 = 0x8f
+Byte 1 = 0x90
+Byte 0 = 0xf0
Read 4 chars: exis
Read 4 chars: Laf
Read 4 chars: erri
-Read 4 chars: ère
+Read 3 chars: ère
Read 4 chars: <al
Read 4 chars: exis
Read 4 chars: .laf
-4
-0 is à (195)
-1 is © (169)
-2 is à (195)
-3 is ¨ (168)
+2
+0 is é (233)
+1 is è (232)
var w = args.first.to_i
var h = w
-var byte_acc = 0
+var byte_acc = 0u8
var bit_num = 0
print("P4\n{w} {h}")
if zr*zr+zi*zi > limit*limit then
byte_acc = (byte_acc.lshift(1))
else
- byte_acc = (byte_acc.lshift(1)) + 1
+ byte_acc = (byte_acc.lshift(1)) + 1u8
end
bit_num = bit_num + 1
if bit_num == 8 then
- printn(byte_acc.ascii)
- byte_acc = 0
+ stdout.write_byte(byte_acc)
+ byte_acc = 0u8
bit_num = 0
else if x == w - 1 then
byte_acc = byte_acc.lshift(8-w%8)
- printn(byte_acc.ascii)
- byte_acc = 0
+ stdout.write_byte(byte_acc)
+ byte_acc = 0u8
bit_num = 0
end
end
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+var fb: Buffer = new RopeBuffer
+#alt1 fb = new FlatBuffer
+
+for i in [0 .. 64[ do fb.add 'A'
+
+fb[63] = 'あ'
+
+fb[32] = 'き'
+
+fb[0] = '𐏓'
+
+fb[32] = 'Z'
+
+var l = 0
+
+for i in fb.chars do
+ print "Char {l} = {i}"
+ l += 1
+end
+
+l = 0
+
+for i in fb.bytes do
+ print "Byte {l} = {i}"
+ l += 1
+end
+
+l = fb.length - 1
+
+for i in fb.chars.reverse_iterator do
+ print "Char {l} = {i}"
+ l -= 1
+end
+
+l = fb.bytelen - 1
+
+for i in fb.bytes.reverse_iterator do
+ print "Byte {l} = {i}"
+ l -= 1
+end