From: Jean Privat Date: Mon, 28 Sep 2015 14:42:57 +0000 (-0400) Subject: Merge: Basename fix X-Git-Tag: v0.7.8~1 X-Git-Url: http://nitlanguage.org?hp=183dc9b27644a9b1ee43843032867d0a7a96872d Merge: Basename fix Should close #1736 if performances are adequate for @privat. This PR optimizes some methods in both `SequenceRead` and `FlatString`, on the test program exposed in #1736, the valgrind runtime has gone from 418,872,526 Ir to 136,522,095 Ir (~67.5% improvement) Some more improvements could be observed if we could get rid of some useless Boxes in `NativeString::to_s_with_copy` because of a `is_same_instance` which boxes NativeStrings (24k allocations in total to remove). Pull-Request: #1740 Reviewed-by: Jean Privat --- diff --git a/lib/core/collection/abstract_collection.nit b/lib/core/collection/abstract_collection.nit index aca4fdc..6b07149 100644 --- a/lib/core/collection/abstract_collection.nit +++ b/lib/core/collection/abstract_collection.nit @@ -853,18 +853,13 @@ interface SequenceRead[E] # assert a.last_index_of_from(20, 2) == 1 # assert a.last_index_of_from(20, 1) == 1 # assert a.last_index_of_from(20, 0) == -1 - fun last_index_of_from(item: nullable Object, pos: Int): Int - do - var res = -1 - var p = 0 - var i = iterator - while i.is_ok do - if p>pos then break - if i.item == item then res = p - i.next - p += 1 + fun last_index_of_from(item: nullable Object, pos: Int): Int do + var i = pos + while i >= 0 do + if self[i] == item then return i + i -= 1 end - return res + return -1 end # Two sequences are equals if they have the same items in the same order. diff --git a/lib/core/file.nit b/lib/core/file.nit index aba34e8..03955eb 100644 --- a/lib/core/file.nit +++ b/lib/core/file.nit @@ -1265,6 +1265,34 @@ redef class FlatString do s.write_native(items, first_byte, bytelen) end + + redef fun file_extension do + var its = _items + var p = _last_byte + var c = its[p] + var st = _first_byte + while p >= st and c != '.'.ascii do + p -= 1 + c = its[p] + end + if p <= st then return null + var ls = _last_byte + return new FlatString.with_infos(its, ls - p, p + 1, ls) + end + + redef fun basename(extension) do + var l = _last_byte + var its = _items + var min = _first_byte + var sl = '/'.ascii + while l > min and its[l] == sl do l -= 1 + if l == min then return "/" + var ns = l + while ns >= min and its[ns] != sl do ns -= 1 + var bname = new FlatString.with_infos(its, l - ns, ns + 1, l) + + return if extension != null then bname.strip_extension(extension) else bname + end end redef class NativeString diff --git a/lib/core/text/abstract_text.nit b/lib/core/text/abstract_text.nit index 63fb920..01d0eab 100644 --- a/lib/core/text/abstract_text.nit +++ b/lib/core/text/abstract_text.nit @@ -146,15 +146,7 @@ abstract class Text # Returns -1 if not found # # DEPRECATED : Use self.chars.last_index_of_from instead - fun last_index_of_from(item: Char, pos: Int): Int - do - var iter = self.chars.reverse_iterator_from(pos) - while iter.is_ok do - if iter.item == item then return iter.index - iter.next - end - return -1 - end + fun last_index_of_from(item: Char, pos: Int): Int do return chars.last_index_of_from(item, pos) # Gets an iterator on the chars of self # @@ -983,9 +975,6 @@ abstract class FlatText # if set before using it. private var items: NativeString is noinit - # Real items, used as cache for to_cstring is called - private var real_items: nullable NativeString = null - # Returns a char* starting at position `first_byte` # # WARNING: If you choose to use this service, be careful of the following. diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit index 4d2ff48..5bc76fc 100644 --- a/lib/core/text/flat.nit +++ b/lib/core/text/flat.nit @@ -200,6 +200,14 @@ class FlatString return _items.utf8_length(_first_byte, _last_byte) end + redef var to_cstring is lazy do + var blen = _bytelen + var new_items = new NativeString(blen + 1) + _items.copy_to(new_items, blen, _first_byte, 0) + new_items[blen] = 0u8 + return new_items + end + redef fun reversed do var b = new FlatBuffer.with_capacity(_bytelen + 1) @@ -304,16 +312,6 @@ class FlatString _bytepos = from end - redef fun to_cstring do - if real_items != null then return real_items.as(not null) - var blen = _bytelen - var new_items = new NativeString(blen + 1) - _items.copy_to(new_items, blen, _first_byte, 0) - new_items[blen] = 0u8 - real_items = new_items - return new_items - end - redef fun ==(other) do if not other isa FlatString then return super @@ -582,6 +580,9 @@ class FlatBuffer private var capacity = 0 + # Real items, used as cache for when to_cstring is called + private var real_items: NativeString is noinit + redef fun fast_cstring do return _items.fast_cstring(0) redef fun substrings do return new FlatSubstringsIter(self) @@ -702,7 +703,7 @@ class FlatBuffer real_items = new_native is_dirty = false end - return real_items.as(not null) + return real_items end # Create a new empty string. @@ -1002,7 +1003,7 @@ redef class NativeString copy_to(new_self, length, 0, 0) var str = new FlatString.with_infos(new_self, length, 0, length - 1) new_self[length] = 0u8 - str.real_items = new_self + str.to_cstring = new_self return str end diff --git a/tests/sav/test_basename_perf.res b/tests/sav/test_basename_perf.res new file mode 100644 index 0000000..ba11526 --- /dev/null +++ b/tests/sav/test_basename_perf.res @@ -0,0 +1 @@ +Usage: ./test paths diff --git a/tests/sav/test_basename_perf_args1.res b/tests/sav/test_basename_perf_args1.res new file mode 100644 index 0000000..25574f7 --- /dev/null +++ b/tests/sav/test_basename_perf_args1.res @@ -0,0 +1,13 @@ + population: 3 + minimum value: 2 + maximum value: 11 + total value: 16 + average value: 5.33 + distribution: + <=2: sub-population=1 (33.33%); cumulated value=2 (12.50%) + <=4: sub-population=1 (33.33%); cumulated value=3 (18.75%) + <=16: sub-population=1 (33.33%); cumulated value=11 (68.75%) + list: + nit: 11 (68.75%) + : 3 (18.75%) + ini: 2 (12.50%) diff --git a/tests/test_basename_perf.args b/tests/test_basename_perf.args new file mode 100644 index 0000000..dfdb261 --- /dev/null +++ b/tests/test_basename_perf.args @@ -0,0 +1 @@ +project1 diff --git a/tests/test_basename_perf.nit b/tests/test_basename_perf.nit new file mode 100644 index 0000000..6d41cb3 --- /dev/null +++ b/tests/test_basename_perf.nit @@ -0,0 +1,35 @@ +# This file is part of NIT ( http://www.nitlanguage.org ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import counter + +var ext_counter = new Counter[String] +var todo = new Array[String] +if args.is_empty then + print "Usage: ./test paths" + exit -1 +else + todo.add_all args +end +while todo.not_empty do + var file = todo.pop + var ext = file.basename.file_extension or else "" + ext_counter.inc ext + var entries = file.files + for entry in entries do + todo.add file / entry + end +end +ext_counter.print_summary +ext_counter.print_elements(10)