Merge: Basename fix
authorJean Privat <jean@pryen.org>
Mon, 28 Sep 2015 14:42:57 +0000 (10:42 -0400)
committerJean Privat <jean@pryen.org>
Mon, 28 Sep 2015 14:42:57 +0000 (10:42 -0400)
Should close #1736 if performances are adequate for @privat.

This PR optimizes some methods in both `SequenceRead` and `FlatString`, on the test program exposed in #1736, the valgrind runtime has gone from 418,872,526 Ir to 136,522,095 Ir (~67.5% improvement)

Some more improvements could be observed if we could get rid of some useless Boxes in `NativeString::to_s_with_copy` because of a `is_same_instance` which boxes NativeStrings (24k allocations in total to remove).

Pull-Request: #1740
Reviewed-by: Jean Privat <jean@pryen.org>

lib/core/collection/abstract_collection.nit
lib/core/file.nit
lib/core/text/abstract_text.nit
lib/core/text/flat.nit
tests/sav/test_basename_perf.res [new file with mode: 0644]
tests/sav/test_basename_perf_args1.res [new file with mode: 0644]
tests/test_basename_perf.args [new file with mode: 0644]
tests/test_basename_perf.nit [new file with mode: 0644]

index aca4fdc..6b07149 100644 (file)
@@ -853,18 +853,13 @@ interface SequenceRead[E]
        #     assert a.last_index_of_from(20, 2)   == 1
        #     assert a.last_index_of_from(20, 1)   == 1
        #     assert a.last_index_of_from(20, 0)   == -1
-       fun last_index_of_from(item: nullable Object, pos: Int): Int
-       do
-               var res = -1
-               var p = 0
-               var i = iterator
-               while i.is_ok do
-                       if p>pos then break
-                       if i.item == item then res = p
-                       i.next
-                       p += 1
+       fun last_index_of_from(item: nullable Object, pos: Int): Int do
+               var i = pos
+               while i >= 0 do
+                       if self[i] == item then return i
+                       i -= 1
                end
-               return res
+               return -1
        end
 
        # Two sequences are equals if they have the same items in the same order.
index aba34e8..03955eb 100644 (file)
@@ -1265,6 +1265,34 @@ redef class FlatString
        do
                s.write_native(items, first_byte, bytelen)
        end
+
+       redef fun file_extension do
+               var its = _items
+               var p = _last_byte
+               var c = its[p]
+               var st = _first_byte
+               while p >= st and c != '.'.ascii do
+                       p -= 1
+                       c = its[p]
+               end
+               if p <= st then return null
+               var ls = _last_byte
+               return new FlatString.with_infos(its, ls - p, p + 1, ls)
+       end
+
+       redef fun basename(extension) do
+               var l = _last_byte
+               var its = _items
+               var min = _first_byte
+               var sl = '/'.ascii
+               while l > min and its[l] == sl do l -= 1
+               if l == min then return "/"
+               var ns = l
+               while ns >= min and its[ns] != sl do ns -= 1
+               var bname = new FlatString.with_infos(its, l - ns, ns + 1, l)
+
+               return if extension != null then bname.strip_extension(extension) else bname
+       end
 end
 
 redef class NativeString
index 63fb920..01d0eab 100644 (file)
@@ -146,15 +146,7 @@ abstract class Text
        # Returns -1 if not found
        #
        # DEPRECATED : Use self.chars.last_index_of_from instead
-       fun last_index_of_from(item: Char, pos: Int): Int
-       do
-               var iter = self.chars.reverse_iterator_from(pos)
-               while iter.is_ok do
-                       if iter.item == item then return iter.index
-                       iter.next
-               end
-               return -1
-       end
+       fun last_index_of_from(item: Char, pos: Int): Int do return chars.last_index_of_from(item, pos)
 
        # Gets an iterator on the chars of self
        #
@@ -983,9 +975,6 @@ abstract class FlatText
        # if set before using it.
        private var items: NativeString is noinit
 
-       # Real items, used as cache for to_cstring is called
-       private var real_items: nullable NativeString = null
-
        # Returns a char* starting at position `first_byte`
        #
        # WARNING: If you choose to use this service, be careful of the following.
index 4d2ff48..5bc76fc 100644 (file)
@@ -200,6 +200,14 @@ class FlatString
                return _items.utf8_length(_first_byte, _last_byte)
        end
 
+       redef var to_cstring is lazy do
+               var blen = _bytelen
+               var new_items = new NativeString(blen + 1)
+               _items.copy_to(new_items, blen, _first_byte, 0)
+               new_items[blen] = 0u8
+               return new_items
+       end
+
        redef fun reversed
        do
                var b = new FlatBuffer.with_capacity(_bytelen + 1)
@@ -304,16 +312,6 @@ class FlatString
                _bytepos = from
        end
 
-       redef fun to_cstring do
-               if real_items != null then return real_items.as(not null)
-               var blen = _bytelen
-               var new_items = new NativeString(blen + 1)
-               _items.copy_to(new_items, blen, _first_byte, 0)
-               new_items[blen] = 0u8
-               real_items = new_items
-               return new_items
-       end
-
        redef fun ==(other)
        do
                if not other isa FlatString then return super
@@ -582,6 +580,9 @@ class FlatBuffer
 
        private var capacity = 0
 
+       # Real items, used as cache for when to_cstring is called
+       private var real_items: NativeString is noinit
+
        redef fun fast_cstring do return _items.fast_cstring(0)
 
        redef fun substrings do return new FlatSubstringsIter(self)
@@ -702,7 +703,7 @@ class FlatBuffer
                        real_items = new_native
                        is_dirty = false
                end
-               return real_items.as(not null)
+               return real_items
        end
 
        # Create a new empty string.
@@ -1002,7 +1003,7 @@ redef class NativeString
                copy_to(new_self, length, 0, 0)
                var str = new FlatString.with_infos(new_self, length, 0, length - 1)
                new_self[length] = 0u8
-               str.real_items = new_self
+               str.to_cstring = new_self
                return str
        end
 
diff --git a/tests/sav/test_basename_perf.res b/tests/sav/test_basename_perf.res
new file mode 100644 (file)
index 0000000..ba11526
--- /dev/null
@@ -0,0 +1 @@
+Usage: ./test paths
diff --git a/tests/sav/test_basename_perf_args1.res b/tests/sav/test_basename_perf_args1.res
new file mode 100644 (file)
index 0000000..25574f7
--- /dev/null
@@ -0,0 +1,13 @@
+ population: 3
+ minimum value: 2
+ maximum value: 11
+ total value: 16
+ average value: 5.33
+ distribution:
+  <=2: sub-population=1 (33.33%); cumulated value=2 (12.50%)
+  <=4: sub-population=1 (33.33%); cumulated value=3 (18.75%)
+  <=16: sub-population=1 (33.33%); cumulated value=11 (68.75%)
+ list:
+  nit: 11 (68.75%)
+  : 3 (18.75%)
+  ini: 2 (12.50%)
diff --git a/tests/test_basename_perf.args b/tests/test_basename_perf.args
new file mode 100644 (file)
index 0000000..dfdb261
--- /dev/null
@@ -0,0 +1 @@
+project1
diff --git a/tests/test_basename_perf.nit b/tests/test_basename_perf.nit
new file mode 100644 (file)
index 0000000..6d41cb3
--- /dev/null
@@ -0,0 +1,35 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import counter
+
+var ext_counter = new Counter[String]
+var todo = new Array[String]
+if args.is_empty then
+       print "Usage: ./test paths"
+       exit -1
+else
+       todo.add_all args
+end
+while todo.not_empty do
+       var file = todo.pop
+       var ext = file.basename.file_extension or else ""
+       ext_counter.inc ext
+       var entries = file.files
+       for entry in entries do
+               todo.add file / entry
+       end
+end
+ext_counter.print_summary
+ext_counter.print_elements(10)