Merge: Performance update on Buffer
authorJean Privat <jean@pryen.org>
Thu, 20 Aug 2015 15:04:44 +0000 (11:04 -0400)
committerJean Privat <jean@pryen.org>
Thu, 20 Aug 2015 15:04:44 +0000 (11:04 -0400)
This PR is essentially the same as #1632, which has been invalidated by some black magic.

This PR heavily improves the performance on FlatBuffer by introducing cache mechanisms on indexed access.

This is possible through removal of byte-level manipulation on Buffers and re-introduction of a constant-time length attribute on Buffer.

nitmd is now practicable again and should no longer take too much time when running benches.

This PR still depends on #1628, and as such will not pass no tests as long as it is not integrated

Pull-Request: #1644

lib/binary/binary.nit
lib/standard/text/abstract_text.nit
lib/standard/text/flat.nit
lib/standard/text/ropes.nit
tests/sav/test_string_bytes.res
tests/test_string_bytes.nit

index 263ad95..7e5c180 100644 (file)
@@ -178,11 +178,13 @@ redef abstract class Reader
        # Returns a truncated string when an error is pending (`last_error != null`).
        fun read_string: String
        do
-               var buf = new FlatBuffer
+               var buf = new Bytes.empty
                loop
                        var byte = read_byte
-                       if byte == null or byte == 0x00u8 then return buf.to_s
-                       buf.bytes.add byte
+                       if byte == null or byte == 0u8 then
+                               return buf.to_s
+                       end
+                       buf.add byte
                end
        end
 
index 8de0af9..1faaad3 100644 (file)
@@ -1353,10 +1353,6 @@ abstract class Buffer
        # In Buffers, the internal sequence of character is mutable
        # Thus, `chars` can be used to modify the buffer.
        redef fun chars: Sequence[Char] is abstract
-
-       # In Buffers, the internal sequence of bytes is mutable
-       # Thus, `bytes` can be used to modify the buffer.
-       redef fun bytes: Sequence[Byte] is abstract
 end
 
 # View for chars on Buffer objects, extends Sequence
@@ -1373,7 +1369,6 @@ end
 # for mutation operations
 private abstract class BufferByteView
        super StringByteView
-       super Sequence[Byte]
 
        redef type SELFTYPE: Buffer
 end
index 87381e6..917b0e5 100644 (file)
@@ -34,41 +34,18 @@ private class FlatSubstringsIter
        redef fun next do tgt = null
 end
 
-# Immutable strings of characters.
-class FlatString
-       super FlatText
-       super String
-
-       # Index at which `self` begins in `items`, inclusively
-       private var first_byte: Int is noinit
+redef class FlatText
 
-       # Index at which `self` ends in `items`, inclusively
-       private var last_byte: Int is noinit
-
-       redef var chars = new FlatStringCharView(self) is lazy
+       private fun first_byte: Int do return 0
 
-       redef var bytes = new FlatStringByteView(self) is lazy
+       private fun last_byte: Int do return bytelen - 1
 
        # Cache of the latest position (char) explored in the string
        var position: Int = 0
+
        # Cached position (bytes) in the NativeString underlying the String
        var bytepos: Int = first_byte is lateinit
 
-       redef var length is lazy do
-               if bytelen == 0 then return 0
-               var st = first_byte
-               var its = items
-               var ln = 0
-               var lst = last_byte
-               while st <= lst do
-                       st += its.length_of_char_at(st)
-                       ln += 1
-               end
-               return ln
-       end
-
-       redef fun [](index) do return items.char_at(char_to_byte_index(index))
-
        # Index of the character `index` in `items`
        private fun char_to_byte_index(index: Int): Int do
                var ln = length
@@ -107,6 +84,37 @@ class FlatString
                return ns_i
        end
 
+       redef fun [](index) do return items.char_at(char_to_byte_index(index))
+end
+
+# Immutable strings of characters.
+class FlatString
+       super FlatText
+       super String
+
+       # Index at which `self` begins in `items`, inclusively
+       redef var first_byte is noinit
+
+       # Index at which `self` ends in `items`, inclusively
+       redef var last_byte is noinit
+
+       redef var chars = new FlatStringCharView(self) is lazy
+
+       redef var bytes = new FlatStringByteView(self) is lazy
+
+       redef var length is lazy do
+               if bytelen == 0 then return 0
+               var st = first_byte
+               var its = items
+               var ln = 0
+               var lst = last_byte
+               while st <= lst do
+                       st += its.length_of_char_at(st)
+                       ln += 1
+               end
+               return ln
+       end
+
        redef fun reversed
        do
                var b = new FlatBuffer.with_capacity(bytelen + 1)
@@ -280,7 +288,7 @@ class FlatString
                var mifrom = first_byte
                if s isa FlatText then
                        var sits = s.items
-                       var sifrom = s.as(FlatString).first_byte
+                       var sifrom = s.first_byte
                        var ns = new NativeString(nlen + 1)
                        mits.copy_to(ns, mlen, mifrom, 0)
                        sits.copy_to(ns, slen, sifrom, mlen)
@@ -471,23 +479,15 @@ class FlatBuffer
 
        redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
 
-       redef var bytes: Sequence[Byte] = new FlatBufferByteView(self) is lazy
+       redef var bytes = new FlatBufferByteView(self) is lazy
 
        redef var bytelen = 0
 
-       # O(n)
-       redef fun length do
-               var max = bytelen
-               if max == 0 then return 0
-               var pos = 0
-               var ln = 0
-               var its = items
-               while pos < max do
-                       pos += its.length_of_char_at(pos)
-                       ln += 1
-               end
-               return ln
-       end
+       redef var length = 0
+
+       private var char_cache: Int = -1
+
+       private var byte_cache: Int = -1
 
        private var capacity = 0
 
@@ -527,12 +527,6 @@ class FlatBuffer
                items.copy_to(items, bytelen - from, from, from - len)
        end
 
-       redef fun [](i)
-       do
-               assert i < length and i >= 0
-               return items.char_at(items.char_to_byte_index(i))
-       end
-
        redef fun []=(index, item)
        do
                assert index >= 0 and index <= length
@@ -553,6 +547,7 @@ class FlatBuffer
                        lshift_bytes(ip + clen, -size_diff)
                end
                bytelen += size_diff
+               bytepos += size_diff
                items.set_char_at(ip, item)
        end
 
@@ -564,21 +559,14 @@ class FlatBuffer
                enlarge(bytelen + clen)
                items.set_char_at(bytelen, c)
                bytelen += clen
-       end
-
-       private fun add_byte(b: Byte) do
-               if written then reset
-               is_dirty = true
-               enlarge(bytelen + 1)
-               items[bytelen] = b
-               # FIXME: Might trigger errors
-               bytelen += 1
+               length += 1
        end
 
        redef fun clear do
                is_dirty = true
                if written then reset
                bytelen = 0
+               length = 0
        end
 
        redef fun empty do return new Buffer
@@ -626,11 +614,12 @@ class FlatBuffer
        #
        # If `items` is shared, `written` should be set to true after the creation
        # so that a modification will do a copy-on-write.
-       private init with_infos(items: NativeString, capacity, bytelen: Int)
+       private init with_infos(items: NativeString, capacity, bytelen, length: Int)
        do
                self.items = items
                self.capacity = capacity
                self.bytelen = bytelen
+               self.length = length
        end
 
        # Create a new string copied from `s`.
@@ -643,6 +632,7 @@ class FlatBuffer
                        for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0)
                end
                bytelen = s.bytelen
+               length = s.length
                capacity = s.bytelen
                written = true
        end
@@ -662,15 +652,14 @@ class FlatBuffer
                is_dirty = true
                var sl = s.bytelen
                enlarge(bytelen + sl)
-               if s isa FlatString then
+               if s isa FlatText then
                        s.items.copy_to(items, sl, s.first_byte, bytelen)
-               else if s isa FlatBuffer then
-                       s.items.copy_to(items, sl, 0, bytelen)
                else
                        for i in s.substrings do append i
                        return
                end
                bytelen += sl
+               length += s.length
        end
 
        # Copies the content of self in `dest`
@@ -695,7 +684,7 @@ class FlatBuffer
                        var byte_length = byteto - bytefrom + 1
                        var r_items = new NativeString(byte_length)
                        items.copy_to(r_items, byte_length, bytefrom, 0)
-                       return new FlatBuffer.with_infos(r_items, byte_length, byte_length)
+                       return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
                else
                        return new Buffer
                end
@@ -761,39 +750,6 @@ private class FlatBufferByteView
 
        redef fun [](index) do return target.items[index]
 
-       redef fun []=(index, item)
-       do
-               assert index >= 0 and index <= target.bytelen
-               if index == target.bytelen then
-                       add(item)
-                       return
-               end
-               target.items[index] = item
-       end
-
-       redef fun push(c)
-       do
-               target.add_byte(c)
-       end
-
-       fun enlarge(cap: Int)
-       do
-               target.enlarge(cap)
-       end
-
-       redef fun append(s)
-       do
-               var s_length = s.length
-               if target.capacity < (target.length + s_length) then enlarge(s_length + target.length)
-               var pos = target.length
-               var its = target.items
-               for i in s do
-                       its[pos] = i
-                       pos += 1
-               end
-               target.length += s.length
-       end
-
        redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
 
        redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
index 08611b7..2b3ff28 100644 (file)
@@ -270,7 +270,7 @@ class RopeBuffer
 
        redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self)
 
-       redef var bytes: Sequence[Byte] is lazy do return new RopeBufferBytes(self)
+       redef var bytes is lazy do return new RopeBufferBytes(self)
 
        # The final string being built on the fly
        private var str: String = ""
@@ -281,6 +281,9 @@ class RopeBuffer
        # Next available (e.g. unset) character in the `Buffer`
        private var rpos = 0
 
+       # Length (in chars) of the buffered part
+       private var nslen = 0
+
        # Keeps track of the buffer's currently dumped part
        #
        # This might happen if for instance, a String was being
@@ -436,7 +439,7 @@ class RopeBuffer
                end
                if s isa FlatText then
                        var oits = s.items
-                       var from = if s isa FlatString then s.first_byte else 0
+                       var from = s.first_byte
                        var remsp = buf_size - rpos
                        if slen <= remsp then
                                oits.copy_to(ns, slen, from, rpos)
@@ -467,18 +470,6 @@ class RopeBuffer
                rpos = rp
        end
 
-       private fun add_byte(b: Byte) do
-               var rp = rpos
-               if rp >= buf_size then
-                       dump_buffer
-                       rp = 0
-               end
-               ns[rp] = b
-               rp += 1
-               bytelen += 1
-               rpos = rp
-       end
-
        # Converts the Buffer to a FlatString, appends it to
        # the final String and re-allocates a new larger Buffer.
        private fun dump_buffer do
@@ -1237,23 +1228,6 @@ class RopeBufferBytes
                end
        end
 
-       redef fun []=(i,c) do
-               if i == target.length then target.add_byte c
-               if i < target.str.length then
-                       # FIXME: Will need to be optimized and rewritten with Unicode
-                       var s = target.str
-                       var l = s.substring(0, i)
-                       var r = s.substring_from(i + 1)
-                       target.str = l + c.to_i.ascii.to_s + r
-               else
-                       target.ns[i - target.str.length] = c
-               end
-       end
-
-       redef fun add(c) do target.add_byte c
-
-       redef fun push(c) do target.add_byte c
-
        redef fun iterator_from(i) do return new RopeBufferByteIterator.from(target, i)
 
        redef fun reverse_iterator_from(i) do return new RopeBufferByteReverseIterator.from(target, i)
index 8d98756..2984735 100644 (file)
@@ -4,9 +4,7 @@
 [0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
 [0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
 [0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
-This string is cool
-This string is coolA
-Ahis string is coolA
-This string is cool
-This string is coolA
-Ahis string is coolA
+[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c]
+[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
+[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c]
+[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
index 875f5dc..13048d8 100644 (file)
@@ -28,24 +28,10 @@ print z.bytes.reverse_iterator.to_a
 
 var b = new FlatBuffer.from(x)
 
-print b
-
-b.bytes.add 0x41u8
-
-print b
-
-b.bytes[0] = 0x41u8
-
-print b
+print b.bytes.to_a
+print b.bytes.reverse_iterator.to_a
 
 var c = new RopeBuffer.from(x)
 
-print c
-
-c.bytes.add 0x41u8
-
-print c
-
-c.bytes[0] = 0x41u8
-
-print c
+print c.bytes
+print c.bytes.reverse_iterator.to_a