From 024784644e5918c9cfb1f61d82d7ab0830b05462 Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Tue, 21 Jul 2015 13:46:08 -0400 Subject: [PATCH] lib/standard: Update libs for the support of UTF-8 Signed-off-by: Lucas Bajolet --- contrib/pep8analysis/src/parser/lexer.nit | 1 + lib/buffered_ropes.nit | 19 +- lib/ropes_debug.nit | 6 +- lib/standard/file.nit | 4 +- lib/standard/stream.nit | 4 +- lib/standard/text/abstract_text.nit | 20 +- lib/standard/text/flat.nit | 403 +++++++++++++++++------------ lib/standard/text/ropes.nit | 152 +++++------ lib/websocket/websocket.nit | 4 +- 9 files changed, 338 insertions(+), 275 deletions(-) diff --git a/contrib/pep8analysis/src/parser/lexer.nit b/contrib/pep8analysis/src/parser/lexer.nit index 96f5430..912336c 100644 --- a/contrib/pep8analysis/src/parser/lexer.nit +++ b/contrib/pep8analysis/src/parser/lexer.nit @@ -338,6 +338,7 @@ class Lexer dfa_state = -1 else var c = string[sp].ascii + if c >= 255 then c = 255 sp += 1 var cr = _cr diff --git a/lib/buffered_ropes.nit b/lib/buffered_ropes.nit index a2a8023..1f92337 100644 --- a/lib/buffered_ropes.nit +++ b/lib/buffered_ropes.nit @@ -94,6 +94,9 @@ private class Leaf var bns: NativeString is noinit redef var length is noinit + # Unsafe, but since it is an experiment, don't mind + redef fun bytelen do return length + redef fun empty do return new Leaf(new ManualBuffer) redef fun to_cstring do @@ -163,14 +166,14 @@ private class Leaf var bpos = buf.pos var sits = s.items if bpos == mlen then - sits.copy_to(buf.ns, slen, s.index_from, bpos) + sits.copy_to(buf.ns, slen, s.first_byte, bpos) buf.pos = bpos + slen return new Leaf(buf) else var b = new ManualBuffer var nbns = b.ns bns.copy_to(nbns, mlen, 0, 0) - sits.copy_to(nbns, slen, s.index_from, mlen) + sits.copy_to(nbns, slen, s.first_byte, mlen) b.pos = nlen return new Leaf(b) end @@ -219,7 +222,7 @@ redef class Concat for i in substrings do var ilen = i.length if i isa FlatString then - i.items.copy_to(ns, ilen, i.index_from, off) + i.items.copy_to(ns, ilen, i.first_byte, off) else if i isa Leaf then i.buf.ns.copy_to(ns, ilen, 0, off) else @@ -261,8 +264,8 @@ redef class FlatString if s isa FlatString then if slen + mlen > maxlen then return new Concat(self, s) var mits = items - var sifrom = s.index_from - var mifrom = index_from + var sifrom = s.first_byte + var mifrom = first_byte var sits = s.items var b = new ManualBuffer var bns = b.ns @@ -277,7 +280,7 @@ redef class FlatString return new Concat(sl + self, s.right) else if s isa Leaf then if slen + mlen > maxlen then return new Concat(self, s) - var mifrom = index_from + var mifrom = first_byte var sb = s.buf var b = new ManualBuffer var bns = b.ns @@ -323,13 +326,13 @@ redef class Array[E] var tmp = na[i] var tpl = tmp.length if tmp isa FlatString then - tmp.items.copy_to(ns, tpl, tmp.index_from, off) + tmp.items.copy_to(ns, tpl, tmp.first_byte, off) off += tpl else for j in tmp.substrings do var slen = j.length if j isa FlatString then - j.items.copy_to(ns, slen, j.index_from, off) + j.items.copy_to(ns, slen, j.first_byte, off) else if j isa Leaf then j.buf.ns.copy_to(ns, slen, 0, off) end diff --git a/lib/ropes_debug.nit b/lib/ropes_debug.nit index a87e8d4..8482352 100644 --- a/lib/ropes_debug.nit +++ b/lib/ropes_debug.nit @@ -45,7 +45,7 @@ redef class RopeBuffer s += "n{object_id} -> n{str.object_id} [label = \"str\"];\n" s += str.internal_to_dot s += "n{object_id} -> n{ns.object_id} [label = \"ns\"];\n" - s += "n{ns.object_id}[label = \"NativeString\", content=\"{ns.to_s_with_length(rpos)}\"];\n" + s += "n{ns.object_id}[label = \"Items\", content=\"{ns}\"];\n" return s end end @@ -53,14 +53,14 @@ end redef class FlatString redef fun internal_to_dot: String do - return "n{object_id} [label=\"FlatString\\nindex_from = {index_from}\\nindex_to = {index_to}\\nNativeString = {items.to_s_with_length(items.cstring_length)}\"];\n" + return "n{object_id} [label=\"FlatString\\nlength = {length}\\nbytelen = {bytelen}\\nfirst_byte = {first_byte}\\nlast_byte = {last_byte}\\nText = {self.escape_to_dot}\"];\n" end end redef class FlatBuffer redef fun internal_to_dot: String do - return "n{object_id} [label=\"FlatBuffer\\length = {length}\\ncapacity = {capacity}\\nitems = {items.to_s_with_length(items.cstring_length)}\"];\n" + return "n{object_id} [label=\"FlatBuffer\\nbytelen = {bytelen}\\nlength = {length}\\ncapacity = {capacity}\\nText = {escape_to_dot}\"];\n" end end diff --git a/lib/standard/file.nit b/lib/standard/file.nit index 4fdafb1..f28f522 100644 --- a/lib/standard/file.nit +++ b/lib/standard/file.nit @@ -679,7 +679,7 @@ redef class Text private fun write_native_to(s: FileWriter) do - for i in substrings do s.write_native(i.to_cstring, 0, i.length) + for i in substrings do s.write_native(i.to_cstring, 0, i.bytelen) end end @@ -1099,7 +1099,7 @@ end redef class FlatString redef fun write_native_to(s) do - s.write_native(items, index_from, length) + s.write_native(items, first_byte, bytelen) end end diff --git a/lib/standard/stream.nit b/lib/standard/stream.nit index dd1875b..7218514 100644 --- a/lib/standard/stream.nit +++ b/lib/standard/stream.nit @@ -527,7 +527,7 @@ abstract class BufferedReader # if there is something to append if i > _buffer_pos then # Enlarge the string (if needed) - s.enlarge(s.length + i - _buffer_pos) + s.enlarge(s.bytelen + i - _buffer_pos) # Copy from the buffer to the string var j = _buffer_pos @@ -664,5 +664,5 @@ class StringReader return new Bytes(nns, nslen, nslen) end - redef fun eof do return cursor >= source.length + redef fun eof do return cursor >= source.bytelen end diff --git a/lib/standard/text/abstract_text.nit b/lib/standard/text/abstract_text.nit index cad7522..97191d2 100644 --- a/lib/standard/text/abstract_text.nit +++ b/lib/standard/text/abstract_text.nit @@ -45,12 +45,14 @@ abstract class Text # # assert "12345".length == 5 # assert "".length == 0 + # assert "あいうえお".length == 5 fun length: Int is abstract # Number of bytes in `self` # - # TODO: Implement correctly once UTF-8 is supported - fun bytelen: Int do return length + # assert "12345".bytelen == 5 + # assert "あいうえお".bytelen == 15 + fun bytelen: Int is abstract # Create a substring. # @@ -58,6 +60,7 @@ abstract class Text # assert "abcd".substring(-1, 2) == "a" # assert "abcd".substring(1, 0) == "" # assert "abcd".substring(2, 5) == "cd" + # assert "あいうえお".substring(1,3) == "いうえ" # # A `from` index < 0 will be replaced by 0. # Unless a `count` value is > 0 at the same time. @@ -934,7 +937,7 @@ abstract class FlatText # Real items, used as cache for to_cstring is called private var real_items: nullable NativeString = null - # Returns a char* starting at position `index_from` + # Returns a char* starting at position `first_byte` # # WARNING: If you choose to use this service, be careful of the following. # @@ -953,6 +956,8 @@ abstract class FlatText redef var length = 0 + redef var bytelen = 0 + redef fun output do var i = 0 @@ -1000,7 +1005,7 @@ private abstract class StringByteView redef fun iterator do return self.iterator_from(0) - redef fun reverse_iterator do return self.reverse_iterator_from(self.length - 1) + redef fun reverse_iterator do return self.reverse_iterator_from(target.bytelen - 1) end # Immutable sequence of characters. @@ -1329,7 +1334,6 @@ private abstract class BufferByteView super Sequence[Byte] redef type SELFTYPE: Buffer - end redef class Object @@ -1574,6 +1578,8 @@ redef class Char # assert '9'.is_numeric # assert not 'a'.is_numeric # assert not '?'.is_numeric + # + # FIXME: Works on ASCII-range only fun is_numeric: Bool do return self >= '0' and self <= '9' @@ -1585,6 +1591,8 @@ redef class Char # assert 'Z'.is_alpha # assert not '0'.is_alpha # assert not '?'.is_alpha + # + # FIXME: Works on ASCII-range only fun is_alpha: Bool do return (self >= 'a' and self <= 'z') or (self >= 'A' and self <= 'Z') @@ -1597,6 +1605,8 @@ redef class Char # assert '0'.is_alphanumeric # assert '9'.is_alphanumeric # assert not '?'.is_alphanumeric + # + # FIXME: Works on ASCII-range only fun is_alphanumeric: Bool do return self.is_numeric or self.is_alpha diff --git a/lib/standard/text/flat.nit b/lib/standard/text/flat.nit index 66e7734..26c1a90 100644 --- a/lib/standard/text/flat.nit +++ b/lib/standard/text/flat.nit @@ -12,6 +12,7 @@ module flat intrude import abstract_text +intrude import native `{ #include @@ -38,28 +39,73 @@ class FlatString super FlatText super String - # Index in _items of the start of the string - private var index_from: Int is noinit + # Index at which `self` begins in `items`, inclusively + private var first_byte: Int is noinit - # Indes in _items of the last item of the string - private var index_to: Int is noinit + # Index at which `self` ends in `items`, inclusively + private var last_byte: Int is noinit redef var chars = new FlatStringCharView(self) is lazy redef var bytes = new FlatStringByteView(self) is lazy - redef fun [](index) - do - # Check that the index (+ index_from) is not larger than indexTo - # In other terms, if the index is valid - assert index >= 0 - assert (index + index_from) <= index_to - return items[index + index_from].to_i.ascii + # Cache of the latest position (char) explored in the string + var position: Int = 0 + # Cached position (bytes) in the NativeString underlying the String + var bytepos: Int = first_byte is lateinit + + redef var length is lazy do + if bytelen == 0 then return 0 + var st = first_byte + var its = items + var ln = 0 + var lst = last_byte + while st <= lst do + st += its.length_of_char_at(st) + ln += 1 + end + return ln end - ################################################ - # AbstractString specific methods # - ################################################ + redef fun [](index) do return items.char_at(char_to_byte_index(index)) + + # Index of the character `index` in `items` + private fun char_to_byte_index(index: Int): Int do + var ln = length + assert index >= 0 + assert index < ln + + # Find best insertion point + var delta_begin = index + var delta_end = (ln - 1) - index + var delta_cache = (position - index).abs + var min = delta_begin + var its = items + + if delta_cache < min then min = delta_cache + if delta_end < min then min = delta_end + + var ns_i: Int + var my_i: Int + + if min == delta_begin then + ns_i = first_byte + my_i = 0 + else if min == delta_cache then + ns_i = bytepos + my_i = position + else + ns_i = its.find_beginning_of_char_at(last_byte) + my_i = length - 1 + end + + ns_i = its.char_to_byte_index_cached(index, my_i, ns_i) + + position = index + bytepos = ns_i + + return ns_i + end redef fun reversed do @@ -72,7 +118,7 @@ class FlatString return s end - redef fun fast_cstring do return items.fast_cstring(index_from) + redef fun fast_cstring do return items.fast_cstring(first_byte) redef fun substring(from, count) do @@ -84,19 +130,16 @@ class FlatString from = 0 end - var new_from = index_from + from + if (count + from) > length then count = length - from + if count <= 0 then return "" + var end_index = from + count - 1 - if (new_from + count) > index_to then - var new_len = index_to - new_from + 1 - if new_len <= 0 then return empty - return new FlatString.with_infos(items, new_len, new_from, index_to) - end + var bytefrom = char_to_byte_index(from) + var byteto = char_to_byte_index(end_index) + byteto += items.length_of_char_at(byteto) - 1 - if count <= 0 then return empty - - var to = new_from + count - 1 - - return new FlatString.with_infos(items, to - new_from + 1, new_from, to) + var s = new FlatString.full(items, byteto - bytefrom + 1, bytefrom, byteto, count) + return s end redef fun empty do return "".as(FlatString) @@ -140,29 +183,38 @@ class FlatString # String Specific Methods # ################################################## - # Low-level creation of a new string with given data. + # Low-level creation of a new string with minimal data. # # `items` will be used as is, without copy, to retrieve the characters of the string. # Aliasing issues is the responsibility of the caller. - private init with_infos(items: NativeString, length: Int, from: Int, to: Int) + private init with_infos(items: NativeString, bytelen, from, to: Int) do self.items = items - self.length = length - index_from = from - index_to = to + self.bytelen = bytelen + first_byte = from + last_byte = to end - redef fun to_cstring + # Low-level creation of a new string with all the data. + # + # `items` will be used as is, without copy, to retrieve the characters of the string. + # Aliasing issues is the responsibility of the caller. + private init full(items: NativeString, bytelen, from, to, length: Int) do - if real_items != null then - return real_items.as(not null) - else - var newItems = new NativeString(length + 1) - self.items.copy_to(newItems, length, index_from, 0) - newItems[length] = 0u8 - self.real_items = newItems - return newItems - end + self.items = items + self.length = length + self.bytelen = bytelen + first_byte = from + last_byte = to + end + + redef fun to_cstring do + if real_items != null then return real_items.as(not null) + var new_items = new NativeString(bytelen + 1) + self.items.copy_to(new_items, bytelen, first_byte, 0) + new_items[bytelen] = 0u8 + real_items = new_items + return new_items end redef fun ==(other) @@ -171,12 +223,12 @@ class FlatString if self.object_id == other.object_id then return true - var my_length = length + var my_length = bytelen - if other.length != my_length then return false + if other.bytelen != my_length then return false - var my_index = index_from - var its_index = other.index_from + var my_index = first_byte + var its_index = other.first_byte var last_iteration = my_index + my_length @@ -198,33 +250,22 @@ class FlatString if self.object_id == other.object_id then return false - var my_curr_char : Char - var its_curr_char : Char - - var my_length = self.length - var its_length = other.length - var max + var my_length = self.bytelen + var its_length = other.bytelen - if my_length < its_length then - max = my_length - else - max = its_length - end + var max = if my_length < its_length then my_length else its_length - var my_chars = chars - var its_chars = other.chars + var myits = self.bytes + var itsits = other.bytes - var pos = 0 - while pos < max do - my_curr_char = my_chars[pos] - its_curr_char = its_chars[pos] + for i in [0 .. max[ do + var my_curr_char = myits[i] + var its_curr_char = itsits[i] if my_curr_char != its_curr_char then if my_curr_char < its_curr_char then return true return false end - - pos += 1 end return my_length < its_length @@ -243,7 +284,7 @@ class FlatString var ns = new NativeString(nlen + 1) mits.copy_to(ns, mlen, mifrom, 0) sits.copy_to(ns, slen, sifrom, mlen) - return ns.to_s_with_length(nlen) + return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length) else abort end @@ -265,16 +306,17 @@ class FlatString return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen) end + redef fun hash do if hash_cache == null then # djb2 hash algorithm var h = 5381 - var i = index_from + var i = first_byte var myitems = items - while i <= index_to do + while i <= last_byte do h = h.lshift(5) + h + myitems[i].to_i i += 1 end @@ -358,16 +400,16 @@ private class FlatStringByteReverseIterator init with_pos(tgt: FlatString, pos: Int) do - init(tgt, tgt.items, pos + tgt.index_from) + init(tgt, tgt.items, pos + tgt.first_byte) end - redef fun is_ok do return curr_pos >= target.index_from + redef fun is_ok do return curr_pos >= target.first_byte redef fun item do return target_items[curr_pos] redef fun next do curr_pos -= 1 - redef fun index do return curr_pos - target.index_from + redef fun index do return curr_pos - target.first_byte end @@ -382,16 +424,16 @@ private class FlatStringByteIterator init with_pos(tgt: FlatString, pos: Int) do - init(tgt, tgt.items, pos + tgt.index_from) + init(tgt, tgt.items, pos + tgt.first_byte) end - redef fun is_ok do return curr_pos <= target.index_to + redef fun is_ok do return curr_pos <= target.last_byte redef fun item do return target_items[curr_pos] redef fun next do curr_pos += 1 - redef fun index do return curr_pos - target.index_from + redef fun index do return curr_pos - target.first_byte end @@ -402,12 +444,12 @@ private class FlatStringByteView redef fun [](index) do - # Check that the index (+ index_from) is not larger than indexTo + # Check that the index (+ first_byte) is not larger than last_byte # In other terms, if the index is valid assert index >= 0 var target = self.target - assert (index + target.index_from) <= target.index_to - return target.items[index + target.index_from] + assert (index + target.first_byte) <= target.last_byte + return target.items[index + target.first_byte] end redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start) @@ -431,7 +473,23 @@ class FlatBuffer redef var bytes: Sequence[Byte] = new FlatBufferByteView(self) is lazy - private var capacity: Int = 0 + redef var bytelen = 0 + + # O(n) + redef fun length do + var max = bytelen + if max == 0 then return 0 + var pos = 0 + var ln = 0 + var its = items + while pos < max do + pos += its.length_of_char_at(pos) + ln += 1 + end + return ln + end + + private var capacity = 0 redef fun fast_cstring do return items.fast_cstring(0) @@ -443,49 +501,84 @@ class FlatBuffer # the Copy-On-Write flag `written` is set at true. private fun reset do var nns = new NativeString(capacity) - items.copy_to(nns, length, 0, 0) + items.copy_to(nns, bytelen, 0, 0) items = nns written = false end - redef fun [](index) + # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from` + # + # Internal only, does not modify bytelen or length, this is the caller's responsability + private fun rshift_bytes(from: Int, len: Int) do + var oit = items + var nit = items + if bytelen + len > capacity then + capacity = capacity * 2 + 2 + nit = new NativeString(capacity) + oit.copy_to(nit, 0, 0, from) + end + oit.copy_to(nit, bytelen - from, from, from + len) + end + + # Shifts the content of the buffer by `len` bytes to the left, starting at `from` + # + # Internal only, does not modify bytelen or length, this is the caller's responsability + private fun lshift_bytes(from: Int, len: Int) do + items.copy_to(items, bytelen - from, from, from - len) + end + + redef fun [](i) do - assert index >= 0 - assert index < length - return items[index].to_i.ascii + assert i < length and i >= 0 + return items.char_at(items.char_to_byte_index(i)) end redef fun []=(index, item) do + assert index >= 0 and index <= length + if written then reset is_dirty = true if index == length then - add(item) + add item return end - if written then reset - assert index >= 0 and index < length - items[index] = item.ascii.to_b + var ip = items.char_to_byte_index(index) + var c = items.char_at(ip) + var clen = c.u8char_len + var itemlen = item.u8char_len + var size_diff = itemlen - clen + if size_diff > 0 then + rshift_bytes(ip + clen, size_diff) + else if size_diff < 0 then + lshift_bytes(ip + clen, -size_diff) + end + bytelen += size_diff + items.set_char_at(ip, item) end redef fun add(c) do + if written then reset is_dirty = true - if capacity <= length then enlarge(length + 5) - items[length] = c.ascii.to_b - length += 1 + var clen = c.u8char_len + enlarge(bytelen + clen) + items.set_char_at(bytelen, c) + bytelen += clen end private fun add_byte(b: Byte) do + if written then reset is_dirty = true - if capacity <= length then enlarge(length + 5) + enlarge(bytelen + 1) items[bytelen] = b - length += 1 + # FIXME: Might trigger errors + bytelen += 1 end redef fun clear do is_dirty = true if written then reset - length = 0 + bytelen = 0 end redef fun empty do return new Buffer @@ -499,7 +592,7 @@ class FlatBuffer # it does a copy of the current `Buffer` written = false var a = new NativeString(c+1) - if length > 0 then items.copy_to(a, length, 0, 0) + if bytelen > 0 then items.copy_to(a, bytelen, 0, 0) items = a capacity = c end @@ -507,16 +600,16 @@ class FlatBuffer redef fun to_s do written = true - if length == 0 then items = new NativeString(1) - return new FlatString.with_infos(items, length, 0, length - 1) + if bytelen == 0 then items = new NativeString(1) + return new FlatString.with_infos(items, bytelen, 0, bytelen - 1) end redef fun to_cstring do if is_dirty then - var new_native = new NativeString(length + 1) - new_native[length] = 0u8 - if length > 0 then items.copy_to(new_native, length, 0, 0) + var new_native = new NativeString(bytelen + 1) + new_native[bytelen] = 0u8 + if length > 0 then items.copy_to(new_native, bytelen, 0, 0) real_items = new_native is_dirty = false end @@ -533,59 +626,51 @@ class FlatBuffer # # If `items` is shared, `written` should be set to true after the creation # so that a modification will do a copy-on-write. - private init with_infos(items: NativeString, capacity, length: Int) + private init with_infos(items: NativeString, capacity, bytelen: Int) do self.items = items - self.length = length self.capacity = capacity + self.bytelen = bytelen end # Create a new string copied from `s`. init from(s: Text) do - capacity = s.length + 1 - length = s.length - items = new NativeString(capacity) - if s isa FlatString then - s.items.copy_to(items, length, s.index_from, 0) - else if s isa FlatBuffer then - s.items.copy_to(items, length, 0, 0) + items = new NativeString(s.bytelen) + if s isa FlatText then + items = s.items else - var curr_pos = 0 - for i in s.bytes do - items[curr_pos] = i - curr_pos += 1 - end + for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0) end + bytelen = s.bytelen + capacity = s.bytelen + written = true end # Create a new empty string with a given capacity. init with_capacity(cap: Int) do assert cap >= 0 - items = new NativeString(cap+1) + items = new NativeString(cap + 1) capacity = cap - length = 0 + bytelen = 0 end redef fun append(s) do if s.is_empty then return is_dirty = true - var sl = s.length - if capacity < length + sl then enlarge(length + sl) + var sl = s.bytelen + enlarge(bytelen + sl) if s isa FlatString then - s.items.copy_to(items, sl, s.index_from, length) + s.items.copy_to(items, sl, s.first_byte, bytelen) else if s isa FlatBuffer then - s.items.copy_to(items, sl, 0, length) + s.items.copy_to(items, sl, 0, bytelen) else - var curr_pos = self.length - for i in s.bytes do - items[curr_pos] = i - curr_pos += 1 - end + for i in s.substrings do append i + return end - length += sl + bytelen += sl end # Copies the content of self in `dest` @@ -601,15 +686,16 @@ class FlatBuffer redef fun substring(from, count) do assert count >= 0 - count += from if from < 0 then from = 0 - if count > length then count = length - if from < count then - var len = count - from - var r_items = new NativeString(len) - items.copy_to(r_items, len, from, 0) - var r = new FlatBuffer.with_infos(r_items, len, len) - return r + if (from + count) > length then count = length - from + if count != 0 then + var bytefrom = items.char_to_byte_index(from) + var byteto = items.char_to_byte_index(count + from - 1) + byteto += items.char_at(byteto).u8char_len - 1 + var byte_length = byteto - bytefrom + 1 + var r_items = new NativeString(byte_length) + items.copy_to(r_items, byte_length, bytefrom, 0) + return new FlatBuffer.with_infos(r_items, byte_length, byte_length) else return new Buffer end @@ -618,22 +704,15 @@ class FlatBuffer redef fun reverse do written = false - var ns = new NativeString(capacity) - var si = length - 1 - var ni = 0 - var it = items - while si >= 0 do - ns[ni] = it[si] - ni += 1 - si -= 1 - end - items = ns + var ns = new FlatBuffer.with_capacity(capacity) + for i in chars.reverse_iterator do ns.add i + items = ns.items end redef fun times(repeats) do - var x = new FlatString.with_infos(items, length, 0, length - 1) - for i in [1..repeats[ do + var x = new FlatString.with_infos(items, bytelen, 0, bytelen - 1) + for i in [1 .. repeats[ do append(x) end end @@ -641,21 +720,13 @@ class FlatBuffer redef fun upper do if written then reset - var id = length - 1 - while id >= 0 do - self[id] = self[id].to_upper - id -= 1 - end + for i in [0 .. length[ do self[i] = self[i].to_upper end redef fun lower do if written then reset - var id = length - 1 - while id >= 0 do - self[id] = self[id].to_lower - id -= 1 - end + for i in [0 .. length[ do self[i] = self[i].to_lower end end @@ -745,7 +816,7 @@ private class FlatBufferByteIterator redef fun index do return curr_pos - redef fun is_ok do return curr_pos < target.length + redef fun is_ok do return curr_pos < target.bytelen redef fun item do return target_items[curr_pos] @@ -925,7 +996,7 @@ redef class Int var ns = new NativeString(nslen + 1) ns[nslen] = 0u8 native_int_to_s(ns, nslen + 1) - return ns.to_s_with_length(nslen) + return new FlatString.full(ns, nslen, 0, nslen - 1, nslen) end end @@ -949,7 +1020,7 @@ redef class Array[E] continue end var tmp = itsi.to_s - sl += tmp.length + sl += tmp.bytelen na[mypos] = tmp i += 1 mypos += 1 @@ -960,15 +1031,15 @@ redef class Array[E] var off = 0 while i < mypos do var tmp = na[i] - var tpl = tmp.length if tmp isa FlatString then - tmp.items.copy_to(ns, tpl, tmp.index_from, off) + var tpl = tmp.bytelen + tmp.items.copy_to(ns, tpl, tmp.first_byte, off) off += tpl else for j in tmp.substrings do var s = j.as(FlatString) - var slen = s.length - s.items.copy_to(ns, slen, s.index_from, off) + var slen = s.bytelen + s.items.copy_to(ns, slen, s.first_byte, off) off += slen end end @@ -987,7 +1058,7 @@ redef class NativeArray[E] var sl = 0 var mypos = 0 while i < l do - sl += na[i].length + sl += na[i].bytelen i += 1 mypos += 1 end @@ -997,15 +1068,15 @@ redef class NativeArray[E] var off = 0 while i < mypos do var tmp = na[i] - var tpl = tmp.length if tmp isa FlatString then - tmp.items.copy_to(ns, tpl, tmp.index_from, off) + var tpl = tmp.bytelen + tmp.items.copy_to(ns, tpl, tmp.first_byte, off) off += tpl else for j in tmp.substrings do var s = j.as(FlatString) - var slen = s.length - s.items.copy_to(ns, slen, s.index_from, off) + var slen = s.bytelen + s.items.copy_to(ns, slen, s.first_byte, off) off += slen end end diff --git a/lib/standard/text/ropes.nit b/lib/standard/text/ropes.nit index 5d5878b..018217e 100644 --- a/lib/standard/text/ropes.nit +++ b/lib/standard/text/ropes.nit @@ -76,18 +76,20 @@ private class Concat redef var length is noinit + redef var bytelen is noinit + redef fun substrings do return new RopeSubstrings(self) redef fun empty do return "" redef var to_cstring is lazy do - var len = length + var len = bytelen var ns = new NativeString(len + 1) ns[len] = 0u8 var off = 0 for i in substrings do - var ilen = i.length - i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off) + var ilen = i.bytelen + i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).first_byte, off) off += ilen end return ns @@ -100,6 +102,7 @@ private class Concat init do length = left.length + right.length + bytelen = left.bytelen + right.bytelen end redef fun output do @@ -147,12 +150,12 @@ private class Concat redef fun +(o) do var s = o.to_s - var slen = s.length + var slen = s.bytelen if s isa Concat then return new Concat(self, s) else var r = right - var rlen = r.length + var rlen = r.bytelen if rlen + slen > maxlen then return new Concat(self, s) return new Concat(left, r + s) end @@ -202,7 +205,7 @@ class RopeBuffer redef var bytes: Sequence[Byte] is lazy do return new RopeBufferBytes(self) # The final string being built on the fly - private var str: String is noinit + private var str: String = "" # Current concatenation buffer private var ns: NativeString is noinit @@ -217,10 +220,21 @@ class RopeBuffer # a long string (length > maxlen) is appended. private var dumped: Int is noinit - # Length of the complete rope - redef var length = 0 + # Length of the complete rope in chars (0) + redef fun length do + var st = dumped + var len = str.length + while st < rpos do + st += ns[st].u8len + len += 1 + end + return len + end + + # Length of the complete rope in bytes + redef var bytelen = 0 - # Length of the mutable part + # Length of the mutable part (in bytes) # # Is also used as base to compute the size of the next # mutable native string (`ns`) @@ -230,7 +244,6 @@ class RopeBuffer # Builds an empty `RopeBuffer` init do - str = "" ns = new NativeString(maxlen) buf_size = maxlen dumped = 0 @@ -241,7 +254,7 @@ class RopeBuffer self.str = str ns = new NativeString(maxlen) buf_size = maxlen - length = str.length + bytelen = str.length dumped = 0 end @@ -306,7 +319,7 @@ class RopeBuffer redef fun clear do str = "" - length = 0 + bytelen = 0 rpos = 0 dumped = 0 if written then @@ -347,63 +360,29 @@ class RopeBuffer end redef fun append(s) do - var slen = s.length - length += slen - var rp = rpos - if s isa Rope or slen > maxlen then - if rp > 0 and dumped != rp then - str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1) - dumped = rp - end - str = str + s + var slen = s.bytelen + if slen >= maxlen then + persist_buffer + str += s.to_s return end - var remsp = buf_size - rp - var sits: NativeString - var begin: Int - if s isa FlatString then - begin = s.index_from - sits = s.items - else if s isa FlatBuffer then - begin = 0 - sits = s.items - else + if s isa FlatText then + var oits = s.items + var from = if s isa FlatString then s.first_byte else 0 + var remsp = buf_size - rpos if slen <= remsp then - for i in s.bytes do - ns[rpos] = i - rpos += 1 - end - else - var spos = 0 - for i in [0..remsp[ do - ns[rpos] = s.bytes[spos] - rpos += 1 - spos += 1 - end - dump_buffer - while spos < slen do - ns[rpos] = s.bytes[spos] - spos += 1 - rpos += 1 - end - end - return - end - if slen <= remsp then - if remsp <= 0 then - dump_buffer - rpos = 0 - else - sits.copy_to(ns, slen, begin, rp) + oits.copy_to(ns, slen, from, rpos) rpos += slen + return end - else - sits.copy_to(ns, remsp, begin, rp) - rpos = buf_size + var brk = oits.find_beginning_of_char_at(from + remsp) + oits.copy_to(ns, brk, from, rpos) + rpos += brk dump_buffer - var nlen = slen - remsp - sits.copy_to(ns, nlen, begin + remsp, 0) - rpos = nlen + oits.copy_to(ns, slen - remsp, brk, 0) + rpos = slen - remsp + else + for i in s.substrings do append i end end @@ -416,7 +395,7 @@ class RopeBuffer # TODO: Fix when supporting UTF-8 ns[rp] = c.ascii.to_b rp += 1 - length += 1 + bytelen += 1 rpos = rp end @@ -428,7 +407,7 @@ class RopeBuffer end ns[rp] = b rp += 1 - length += 1 + bytelen += 1 rpos = rp end @@ -443,10 +422,12 @@ class RopeBuffer ns = new NativeString(bs) buf_size = bs dumped = 0 + rpos = 0 end # Similar to dump_buffer, but does not reallocate a new NativeString private fun persist_buffer do + if rpos == dumped then return var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) str += nstr dumped = rpos @@ -468,10 +449,8 @@ class RopeBuffer redef fun enlarge(i) do end redef fun to_s do - written = true - var nnslen = rpos - dumped - if nnslen == 0 then return str - return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) + dump_buffer + return str end redef fun reverse do @@ -506,16 +485,16 @@ redef class FlatString redef fun +(o) do var s = o.to_s - var slen = s.length - var mlen = length + var slen = s.bytelen + var mlen = bytelen if slen == 0 then return self if mlen == 0 then return s var nlen = slen + mlen if s isa FlatString then if nlen > maxlen then return new Concat(self, s) var mits = items - var sifrom = s.index_from - var mifrom = index_from + var sifrom = s.first_byte + var mifrom = first_byte var sits = s.items var ns = new NativeString(nlen + 1) mits.copy_to(ns, mlen, mifrom, 0) @@ -523,7 +502,7 @@ redef class FlatString return ns.to_s_with_length(nlen) else if s isa Concat then var sl = s.left - var sllen = sl.length + var sllen = sl.bytelen if sllen + mlen > maxlen then return new Concat(self, s) return new Concat(self + sl, s.right) else @@ -559,11 +538,11 @@ private class RopeByteReverseIterator var subs: IndexedIterator[FlatString] init(root: Concat) is old_style_init do - pos = root.length - 1 + pos = root.bytelen - 1 subs = new ReverseRopeSubstrings(root) var s = subs.item ns = s.items - pns = s.index_to + pns = s.last_byte end init from(root: Concat, pos: Int) do @@ -589,7 +568,7 @@ private class RopeByteReverseIterator if not subs.is_ok then return var s = subs.item ns = s.items - pns = s.index_to + pns = s.last_byte end end @@ -633,7 +612,7 @@ private class RopeByteIterator redef fun next do pns += 1 pos += 1 - if pns < subs.item.length then return + if pns < subs.item.bytelen then return if not subs.is_ok then return subs.next if not subs.is_ok then return @@ -975,7 +954,6 @@ private class RopeBytes redef type SELFTYPE: Concat redef fun [](i) do - var b: Int var nod: String = target loop if nod isa FlatString then return nod.items[i] @@ -1096,7 +1074,7 @@ class RopeBufferByteIterator # Init the iterator from a RopeBuffer. init(t: RopeBuffer) is old_style_init do ns = t.ns - maxpos = t.rpos + maxpos = t.bytelen sit = t.str.bytes.iterator pns = t.dumped index = 0 @@ -1105,7 +1083,7 @@ class RopeBufferByteIterator # Init the iterator from a RopeBuffer starting from `pos`. init from(t: RopeBuffer, pos: Int) do ns = t.ns - maxpos = t.length + maxpos = t.bytelen sit = t.str.bytes.iterator_from(pos) pns = pos - t.str.length index = pos @@ -1147,19 +1125,19 @@ class RopeBufferByteReverseIterator init(tgt: RopeBuffer) is old_style_init do sit = tgt.str.bytes.reverse_iterator pns = tgt.rpos - 1 - index = tgt.length - 1 + index = tgt.bytelen - 1 ns = tgt.ns end # Init the iterator from a RopeBuffer starting from `pos`. init from(tgt: RopeBuffer, pos: Int) do - sit = tgt.str.bytes.reverse_iterator_from(pos - tgt.rpos - tgt.dumped) - pns = pos - tgt.str.length + sit = tgt.str.bytes.reverse_iterator_from(pos - (tgt.rpos - tgt.dumped)) + pns = pos - tgt.str.bytelen + tgt.rpos index = pos ns = tgt.ns end - redef fun is_ok do return index > 0 + redef fun is_ok do return index >= 0 redef fun item do if pns >= 0 then return ns[pns] @@ -1168,7 +1146,7 @@ class RopeBufferByteReverseIterator redef fun next do index -= 1 - if pns >= 0 then + if pns > 0 then pns -= 1 else sit.next @@ -1186,7 +1164,7 @@ class RopeBufferBytes if i < target.str.bytelen then return target.str.bytes[i] else - return target.ns[i - target.str.length] + return target.ns[i - target.str.bytelen] end end diff --git a/lib/websocket/websocket.nit b/lib/websocket/websocket.nit index d384512..441df14 100644 --- a/lib/websocket/websocket.nit +++ b/lib/websocket/websocket.nit @@ -137,10 +137,10 @@ class WebsocketConnection ans_buffer.add(msg.length.to_b) end if msg isa FlatString then - ans_buffer.append_ns_from(msg.items, msg.length, msg.index_from) + ans_buffer.append_ns_from(msg.items, msg.length, msg.first_byte) else for i in msg.substrings do - ans_buffer.append_ns_from(i.as(FlatString).items, i.length, i.as(FlatString).index_from) + ans_buffer.append_ns_from(i.as(FlatString).items, i.length, i.as(FlatString).first_byte) end end return ans_buffer -- 1.7.9.5