This PR is essentially the same as #1632, which has been invalidated by some black magic.
This PR heavily improves the performance on FlatBuffer by introducing cache mechanisms on indexed access.
This is possible through removal of byte-level manipulation on Buffers and re-introduction of a constant-time length attribute on Buffer.
nitmd is now practicable again and should no longer take too much time when running benches.
This PR still depends on #1628, and as such will not pass no tests as long as it is not integrated
Pull-Request: #1644
# Returns a truncated string when an error is pending (`last_error != null`).
fun read_string: String
do
- var buf = new FlatBuffer
+ var buf = new Bytes.empty
loop
var byte = read_byte
- if byte == null or byte == 0x00u8 then return buf.to_s
- buf.bytes.add byte
+ if byte == null or byte == 0u8 then
+ return buf.to_s
+ end
+ buf.add byte
end
end
# In Buffers, the internal sequence of character is mutable
# Thus, `chars` can be used to modify the buffer.
redef fun chars: Sequence[Char] is abstract
-
- # In Buffers, the internal sequence of bytes is mutable
- # Thus, `bytes` can be used to modify the buffer.
- redef fun bytes: Sequence[Byte] is abstract
end
# View for chars on Buffer objects, extends Sequence
# for mutation operations
private abstract class BufferByteView
super StringByteView
- super Sequence[Byte]
redef type SELFTYPE: Buffer
end
redef fun next do tgt = null
end
-# Immutable strings of characters.
-class FlatString
- super FlatText
- super String
-
- # Index at which `self` begins in `items`, inclusively
- private var first_byte: Int is noinit
+redef class FlatText
- # Index at which `self` ends in `items`, inclusively
- private var last_byte: Int is noinit
-
- redef var chars = new FlatStringCharView(self) is lazy
+ private fun first_byte: Int do return 0
- redef var bytes = new FlatStringByteView(self) is lazy
+ private fun last_byte: Int do return bytelen - 1
# Cache of the latest position (char) explored in the string
var position: Int = 0
+
# Cached position (bytes) in the NativeString underlying the String
var bytepos: Int = first_byte is lateinit
- redef var length is lazy do
- if bytelen == 0 then return 0
- var st = first_byte
- var its = items
- var ln = 0
- var lst = last_byte
- while st <= lst do
- st += its.length_of_char_at(st)
- ln += 1
- end
- return ln
- end
-
- redef fun [](index) do return items.char_at(char_to_byte_index(index))
-
# Index of the character `index` in `items`
private fun char_to_byte_index(index: Int): Int do
var ln = length
return ns_i
end
+ redef fun [](index) do return items.char_at(char_to_byte_index(index))
+end
+
+# Immutable strings of characters.
+class FlatString
+ super FlatText
+ super String
+
+ # Index at which `self` begins in `items`, inclusively
+ redef var first_byte is noinit
+
+ # Index at which `self` ends in `items`, inclusively
+ redef var last_byte is noinit
+
+ redef var chars = new FlatStringCharView(self) is lazy
+
+ redef var bytes = new FlatStringByteView(self) is lazy
+
+ redef var length is lazy do
+ if bytelen == 0 then return 0
+ var st = first_byte
+ var its = items
+ var ln = 0
+ var lst = last_byte
+ while st <= lst do
+ st += its.length_of_char_at(st)
+ ln += 1
+ end
+ return ln
+ end
+
redef fun reversed
do
var b = new FlatBuffer.with_capacity(bytelen + 1)
var mifrom = first_byte
if s isa FlatText then
var sits = s.items
- var sifrom = s.as(FlatString).first_byte
+ var sifrom = s.first_byte
var ns = new NativeString(nlen + 1)
mits.copy_to(ns, mlen, mifrom, 0)
sits.copy_to(ns, slen, sifrom, mlen)
redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
- redef var bytes: Sequence[Byte] = new FlatBufferByteView(self) is lazy
+ redef var bytes = new FlatBufferByteView(self) is lazy
redef var bytelen = 0
- # O(n)
- redef fun length do
- var max = bytelen
- if max == 0 then return 0
- var pos = 0
- var ln = 0
- var its = items
- while pos < max do
- pos += its.length_of_char_at(pos)
- ln += 1
- end
- return ln
- end
+ redef var length = 0
+
+ private var char_cache: Int = -1
+
+ private var byte_cache: Int = -1
private var capacity = 0
items.copy_to(items, bytelen - from, from, from - len)
end
- redef fun [](i)
- do
- assert i < length and i >= 0
- return items.char_at(items.char_to_byte_index(i))
- end
-
redef fun []=(index, item)
do
assert index >= 0 and index <= length
lshift_bytes(ip + clen, -size_diff)
end
bytelen += size_diff
+ bytepos += size_diff
items.set_char_at(ip, item)
end
enlarge(bytelen + clen)
items.set_char_at(bytelen, c)
bytelen += clen
- end
-
- private fun add_byte(b: Byte) do
- if written then reset
- is_dirty = true
- enlarge(bytelen + 1)
- items[bytelen] = b
- # FIXME: Might trigger errors
- bytelen += 1
+ length += 1
end
redef fun clear do
is_dirty = true
if written then reset
bytelen = 0
+ length = 0
end
redef fun empty do return new Buffer
#
# If `items` is shared, `written` should be set to true after the creation
# so that a modification will do a copy-on-write.
- private init with_infos(items: NativeString, capacity, bytelen: Int)
+ private init with_infos(items: NativeString, capacity, bytelen, length: Int)
do
self.items = items
self.capacity = capacity
self.bytelen = bytelen
+ self.length = length
end
# Create a new string copied from `s`.
for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0)
end
bytelen = s.bytelen
+ length = s.length
capacity = s.bytelen
written = true
end
is_dirty = true
var sl = s.bytelen
enlarge(bytelen + sl)
- if s isa FlatString then
+ if s isa FlatText then
s.items.copy_to(items, sl, s.first_byte, bytelen)
- else if s isa FlatBuffer then
- s.items.copy_to(items, sl, 0, bytelen)
else
for i in s.substrings do append i
return
end
bytelen += sl
+ length += s.length
end
# Copies the content of self in `dest`
var byte_length = byteto - bytefrom + 1
var r_items = new NativeString(byte_length)
items.copy_to(r_items, byte_length, bytefrom, 0)
- return new FlatBuffer.with_infos(r_items, byte_length, byte_length)
+ return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
else
return new Buffer
end
redef fun [](index) do return target.items[index]
- redef fun []=(index, item)
- do
- assert index >= 0 and index <= target.bytelen
- if index == target.bytelen then
- add(item)
- return
- end
- target.items[index] = item
- end
-
- redef fun push(c)
- do
- target.add_byte(c)
- end
-
- fun enlarge(cap: Int)
- do
- target.enlarge(cap)
- end
-
- redef fun append(s)
- do
- var s_length = s.length
- if target.capacity < (target.length + s_length) then enlarge(s_length + target.length)
- var pos = target.length
- var its = target.items
- for i in s do
- its[pos] = i
- pos += 1
- end
- target.length += s.length
- end
-
redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self)
- redef var bytes: Sequence[Byte] is lazy do return new RopeBufferBytes(self)
+ redef var bytes is lazy do return new RopeBufferBytes(self)
# The final string being built on the fly
private var str: String = ""
# Next available (e.g. unset) character in the `Buffer`
private var rpos = 0
+ # Length (in chars) of the buffered part
+ private var nslen = 0
+
# Keeps track of the buffer's currently dumped part
#
# This might happen if for instance, a String was being
end
if s isa FlatText then
var oits = s.items
- var from = if s isa FlatString then s.first_byte else 0
+ var from = s.first_byte
var remsp = buf_size - rpos
if slen <= remsp then
oits.copy_to(ns, slen, from, rpos)
rpos = rp
end
- private fun add_byte(b: Byte) do
- var rp = rpos
- if rp >= buf_size then
- dump_buffer
- rp = 0
- end
- ns[rp] = b
- rp += 1
- bytelen += 1
- rpos = rp
- end
-
# Converts the Buffer to a FlatString, appends it to
# the final String and re-allocates a new larger Buffer.
private fun dump_buffer do
end
end
- redef fun []=(i,c) do
- if i == target.length then target.add_byte c
- if i < target.str.length then
- # FIXME: Will need to be optimized and rewritten with Unicode
- var s = target.str
- var l = s.substring(0, i)
- var r = s.substring_from(i + 1)
- target.str = l + c.to_i.ascii.to_s + r
- else
- target.ns[i - target.str.length] = c
- end
- end
-
- redef fun add(c) do target.add_byte c
-
- redef fun push(c) do target.add_byte c
-
redef fun iterator_from(i) do return new RopeBufferByteIterator.from(target, i)
redef fun reverse_iterator_from(i) do return new RopeBufferByteReverseIterator.from(target, i)
[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
-This string is cool
-This string is coolA
-Ahis string is coolA
-This string is cool
-This string is coolA
-Ahis string is coolA
+[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c]
+[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
+[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c]
+[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
var b = new FlatBuffer.from(x)
-print b
-
-b.bytes.add 0x41u8
-
-print b
-
-b.bytes[0] = 0x41u8
-
-print b
+print b.bytes.to_a
+print b.bytes.reverse_iterator.to_a
var c = new RopeBuffer.from(x)
-print c
-
-c.bytes.add 0x41u8
-
-print c
-
-c.bytes[0] = 0x41u8
-
-print c
+print c.bytes
+print c.bytes.reverse_iterator.to_a