From: Jean Privat Date: Thu, 20 Aug 2015 15:04:44 +0000 (-0400) Subject: Merge: Performance update on Buffer X-Git-Tag: v0.7.8~79 X-Git-Url: http://nitlanguage.org?hp=a672b1d158eeb2cbf1dfb5316f6f37c558971832 Merge: Performance update on Buffer This PR is essentially the same as #1632, which has been invalidated by some black magic. This PR heavily improves the performance on FlatBuffer by introducing cache mechanisms on indexed access. This is possible through removal of byte-level manipulation on Buffers and re-introduction of a constant-time length attribute on Buffer. nitmd is now practicable again and should no longer take too much time when running benches. This PR still depends on #1628, and as such will not pass no tests as long as it is not integrated Pull-Request: #1644 --- diff --git a/lib/binary/binary.nit b/lib/binary/binary.nit index 263ad95..7e5c180 100644 --- a/lib/binary/binary.nit +++ b/lib/binary/binary.nit @@ -178,11 +178,13 @@ redef abstract class Reader # Returns a truncated string when an error is pending (`last_error != null`). fun read_string: String do - var buf = new FlatBuffer + var buf = new Bytes.empty loop var byte = read_byte - if byte == null or byte == 0x00u8 then return buf.to_s - buf.bytes.add byte + if byte == null or byte == 0u8 then + return buf.to_s + end + buf.add byte end end diff --git a/lib/standard/text/abstract_text.nit b/lib/standard/text/abstract_text.nit index 8de0af9..1faaad3 100644 --- a/lib/standard/text/abstract_text.nit +++ b/lib/standard/text/abstract_text.nit @@ -1353,10 +1353,6 @@ abstract class Buffer # In Buffers, the internal sequence of character is mutable # Thus, `chars` can be used to modify the buffer. redef fun chars: Sequence[Char] is abstract - - # In Buffers, the internal sequence of bytes is mutable - # Thus, `bytes` can be used to modify the buffer. - redef fun bytes: Sequence[Byte] is abstract end # View for chars on Buffer objects, extends Sequence @@ -1373,7 +1369,6 @@ end # for mutation operations private abstract class BufferByteView super StringByteView - super Sequence[Byte] redef type SELFTYPE: Buffer end diff --git a/lib/standard/text/flat.nit b/lib/standard/text/flat.nit index 87381e6..917b0e5 100644 --- a/lib/standard/text/flat.nit +++ b/lib/standard/text/flat.nit @@ -34,41 +34,18 @@ private class FlatSubstringsIter redef fun next do tgt = null end -# Immutable strings of characters. -class FlatString - super FlatText - super String - - # Index at which `self` begins in `items`, inclusively - private var first_byte: Int is noinit +redef class FlatText - # Index at which `self` ends in `items`, inclusively - private var last_byte: Int is noinit - - redef var chars = new FlatStringCharView(self) is lazy + private fun first_byte: Int do return 0 - redef var bytes = new FlatStringByteView(self) is lazy + private fun last_byte: Int do return bytelen - 1 # Cache of the latest position (char) explored in the string var position: Int = 0 + # Cached position (bytes) in the NativeString underlying the String var bytepos: Int = first_byte is lateinit - redef var length is lazy do - if bytelen == 0 then return 0 - var st = first_byte - var its = items - var ln = 0 - var lst = last_byte - while st <= lst do - st += its.length_of_char_at(st) - ln += 1 - end - return ln - end - - redef fun [](index) do return items.char_at(char_to_byte_index(index)) - # Index of the character `index` in `items` private fun char_to_byte_index(index: Int): Int do var ln = length @@ -107,6 +84,37 @@ class FlatString return ns_i end + redef fun [](index) do return items.char_at(char_to_byte_index(index)) +end + +# Immutable strings of characters. +class FlatString + super FlatText + super String + + # Index at which `self` begins in `items`, inclusively + redef var first_byte is noinit + + # Index at which `self` ends in `items`, inclusively + redef var last_byte is noinit + + redef var chars = new FlatStringCharView(self) is lazy + + redef var bytes = new FlatStringByteView(self) is lazy + + redef var length is lazy do + if bytelen == 0 then return 0 + var st = first_byte + var its = items + var ln = 0 + var lst = last_byte + while st <= lst do + st += its.length_of_char_at(st) + ln += 1 + end + return ln + end + redef fun reversed do var b = new FlatBuffer.with_capacity(bytelen + 1) @@ -280,7 +288,7 @@ class FlatString var mifrom = first_byte if s isa FlatText then var sits = s.items - var sifrom = s.as(FlatString).first_byte + var sifrom = s.first_byte var ns = new NativeString(nlen + 1) mits.copy_to(ns, mlen, mifrom, 0) sits.copy_to(ns, slen, sifrom, mlen) @@ -471,23 +479,15 @@ class FlatBuffer redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy - redef var bytes: Sequence[Byte] = new FlatBufferByteView(self) is lazy + redef var bytes = new FlatBufferByteView(self) is lazy redef var bytelen = 0 - # O(n) - redef fun length do - var max = bytelen - if max == 0 then return 0 - var pos = 0 - var ln = 0 - var its = items - while pos < max do - pos += its.length_of_char_at(pos) - ln += 1 - end - return ln - end + redef var length = 0 + + private var char_cache: Int = -1 + + private var byte_cache: Int = -1 private var capacity = 0 @@ -527,12 +527,6 @@ class FlatBuffer items.copy_to(items, bytelen - from, from, from - len) end - redef fun [](i) - do - assert i < length and i >= 0 - return items.char_at(items.char_to_byte_index(i)) - end - redef fun []=(index, item) do assert index >= 0 and index <= length @@ -553,6 +547,7 @@ class FlatBuffer lshift_bytes(ip + clen, -size_diff) end bytelen += size_diff + bytepos += size_diff items.set_char_at(ip, item) end @@ -564,21 +559,14 @@ class FlatBuffer enlarge(bytelen + clen) items.set_char_at(bytelen, c) bytelen += clen - end - - private fun add_byte(b: Byte) do - if written then reset - is_dirty = true - enlarge(bytelen + 1) - items[bytelen] = b - # FIXME: Might trigger errors - bytelen += 1 + length += 1 end redef fun clear do is_dirty = true if written then reset bytelen = 0 + length = 0 end redef fun empty do return new Buffer @@ -626,11 +614,12 @@ class FlatBuffer # # If `items` is shared, `written` should be set to true after the creation # so that a modification will do a copy-on-write. - private init with_infos(items: NativeString, capacity, bytelen: Int) + private init with_infos(items: NativeString, capacity, bytelen, length: Int) do self.items = items self.capacity = capacity self.bytelen = bytelen + self.length = length end # Create a new string copied from `s`. @@ -643,6 +632,7 @@ class FlatBuffer for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0) end bytelen = s.bytelen + length = s.length capacity = s.bytelen written = true end @@ -662,15 +652,14 @@ class FlatBuffer is_dirty = true var sl = s.bytelen enlarge(bytelen + sl) - if s isa FlatString then + if s isa FlatText then s.items.copy_to(items, sl, s.first_byte, bytelen) - else if s isa FlatBuffer then - s.items.copy_to(items, sl, 0, bytelen) else for i in s.substrings do append i return end bytelen += sl + length += s.length end # Copies the content of self in `dest` @@ -695,7 +684,7 @@ class FlatBuffer var byte_length = byteto - bytefrom + 1 var r_items = new NativeString(byte_length) items.copy_to(r_items, byte_length, bytefrom, 0) - return new FlatBuffer.with_infos(r_items, byte_length, byte_length) + return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count) else return new Buffer end @@ -761,39 +750,6 @@ private class FlatBufferByteView redef fun [](index) do return target.items[index] - redef fun []=(index, item) - do - assert index >= 0 and index <= target.bytelen - if index == target.bytelen then - add(item) - return - end - target.items[index] = item - end - - redef fun push(c) - do - target.add_byte(c) - end - - fun enlarge(cap: Int) - do - target.enlarge(cap) - end - - redef fun append(s) - do - var s_length = s.length - if target.capacity < (target.length + s_length) then enlarge(s_length + target.length) - var pos = target.length - var its = target.items - for i in s do - its[pos] = i - pos += 1 - end - target.length += s.length - end - redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos) redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos) diff --git a/lib/standard/text/ropes.nit b/lib/standard/text/ropes.nit index 08611b7..2b3ff28 100644 --- a/lib/standard/text/ropes.nit +++ b/lib/standard/text/ropes.nit @@ -270,7 +270,7 @@ class RopeBuffer redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self) - redef var bytes: Sequence[Byte] is lazy do return new RopeBufferBytes(self) + redef var bytes is lazy do return new RopeBufferBytes(self) # The final string being built on the fly private var str: String = "" @@ -281,6 +281,9 @@ class RopeBuffer # Next available (e.g. unset) character in the `Buffer` private var rpos = 0 + # Length (in chars) of the buffered part + private var nslen = 0 + # Keeps track of the buffer's currently dumped part # # This might happen if for instance, a String was being @@ -436,7 +439,7 @@ class RopeBuffer end if s isa FlatText then var oits = s.items - var from = if s isa FlatString then s.first_byte else 0 + var from = s.first_byte var remsp = buf_size - rpos if slen <= remsp then oits.copy_to(ns, slen, from, rpos) @@ -467,18 +470,6 @@ class RopeBuffer rpos = rp end - private fun add_byte(b: Byte) do - var rp = rpos - if rp >= buf_size then - dump_buffer - rp = 0 - end - ns[rp] = b - rp += 1 - bytelen += 1 - rpos = rp - end - # Converts the Buffer to a FlatString, appends it to # the final String and re-allocates a new larger Buffer. private fun dump_buffer do @@ -1237,23 +1228,6 @@ class RopeBufferBytes end end - redef fun []=(i,c) do - if i == target.length then target.add_byte c - if i < target.str.length then - # FIXME: Will need to be optimized and rewritten with Unicode - var s = target.str - var l = s.substring(0, i) - var r = s.substring_from(i + 1) - target.str = l + c.to_i.ascii.to_s + r - else - target.ns[i - target.str.length] = c - end - end - - redef fun add(c) do target.add_byte c - - redef fun push(c) do target.add_byte c - redef fun iterator_from(i) do return new RopeBufferByteIterator.from(target, i) redef fun reverse_iterator_from(i) do return new RopeBufferByteReverseIterator.from(target, i) diff --git a/tests/sav/test_string_bytes.res b/tests/sav/test_string_bytes.res index 8d98756..2984735 100644 --- a/tests/sav/test_string_bytes.res +++ b/tests/sav/test_string_bytes.res @@ -4,9 +4,7 @@ [0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54] [0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54] [0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54] -This string is cool -This string is coolA -Ahis string is coolA -This string is cool -This string is coolA -Ahis string is coolA +[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c] +[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54] +[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c] +[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54] diff --git a/tests/test_string_bytes.nit b/tests/test_string_bytes.nit index 875f5dc..13048d8 100644 --- a/tests/test_string_bytes.nit +++ b/tests/test_string_bytes.nit @@ -28,24 +28,10 @@ print z.bytes.reverse_iterator.to_a var b = new FlatBuffer.from(x) -print b - -b.bytes.add 0x41u8 - -print b - -b.bytes[0] = 0x41u8 - -print b +print b.bytes.to_a +print b.bytes.reverse_iterator.to_a var c = new RopeBuffer.from(x) -print c - -c.bytes.add 0x41u8 - -print c - -c.bytes[0] = 0x41u8 - -print c +print c.bytes +print c.bytes.reverse_iterator.to_a