Merge: Better nitcc
authorJean Privat <jean@pryen.org>
Wed, 9 Sep 2015 15:04:17 +0000 (11:04 -0400)
committerJean Privat <jean@pryen.org>
Wed, 9 Sep 2015 15:04:17 +0000 (11:04 -0400)
Some improvements and bugfixes for nitcc.

* Improve code size in C and in Nit
* Produce valid Nit code even in case of R/R, S/R or S/R/R conflicts. Close #1693

Before: compiling objc_test_parser on an old laptop
* 880.58 usertime
* 3GB memory
* produced executable: 50MB (31MB stripped)

after: same grammar, better nitcc
* 337.80usertime
* 788MB memory
* produced executable: 28MB (8,8MB stripped)

Pull-Request: #1694
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>
Reviewed-by: Lucas Bajolet <r4pass@hotmail.com>

lib/counter.nit
lib/text_stat.nit [new file with mode: 0644]
tests/niti.skip
tests/nitvm.skip
tests/sav/nitcg/test_text_stat.res [new file with mode: 0644]
tests/sav/test_text_stat.res [new file with mode: 0644]
tests/test_text_stat.nit [new file with mode: 0644]

index 644c115..9a539e6 100644 (file)
@@ -266,6 +266,20 @@ class Counter[E]
                end
                return res
        end
+
+       # Prints the content of the counter along with statistics
+       #
+       # Content is printed in order (if available) from lowest to highest on the keys.
+       # Else, it is printed as-is
+       fun print_content do
+               var a = keys.to_a
+               if a isa Array[Comparable] then default_comparator.sort(a)
+               var subtotal = 0
+               for i in a do
+                       subtotal += self[i]
+                       printn("* ", i or else "null", " = ", self[i], " => occurences ", self[i].to_f / sum.to_f * 100.0, "%, cumulative ", subtotal.to_f / sum.to_f * 100.0, "% \n")
+               end
+       end
 end
 
 redef class Collection[E]
diff --git a/lib/text_stat.nit b/lib/text_stat.nit
new file mode 100644 (file)
index 0000000..f09a454
--- /dev/null
@@ -0,0 +1,375 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# This file is free software, which comes along with NIT.  This software is
+# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without  even  the implied warranty of  MERCHANTABILITY or  FITNESS FOR A
+# PARTICULAR PURPOSE.  You can modify it is you want,  provided this header
+# is kept unaltered, and a notification of the changes is added.
+# You  are  allowed  to  redistribute it and sell it, alone or is a part of
+# another product.
+
+# Injects stat-calculating functionalities to Text and its variants
+#
+# Every allocation is counted for each available type of Text in Core
+#
+# Cached operations are monitored and statistics of their use are printed
+# at the end of the execution of a program
+module text_stat
+
+intrude import core::text::ropes
+import counter
+
+redef class Sys
+
+       # Counts the number of allocations of FlatString
+       var flatstr_allocations = 0
+
+       # Counts the number of allocations of FlatBuffer
+       var flatbuf_allocations = 0
+
+       # Counts the number of allocations of Concat
+       var concat_allocations = 0
+
+       # Counts the number of allocations of RopeBuffer
+       var ropebuf_allocations = 0
+
+       # Counts the number of calls to property length
+       var length_calls = new Counter[String]
+
+       # Counts the number of length calls that missed the cache
+       var length_cache_miss = new Counter[String]
+
+       # Counts the number of call to index on a Text
+       var index_call = new Counter[String]
+
+       # Count the number of times that an indexed access
+       # on a Concat caused a regeneration of the cache
+       var concat_cache_miss = 0
+
+       # Distance between characters when looking for a character in a FlatString
+       var index_len = new Counter[Int]
+
+       # Length (bytes) of the FlatString created by lib
+       var str_bytelen = new Counter[Int]
+
+       # Counter of the times `bytelen` is called on FlatString
+       var bytelen_call = new Counter[String]
+
+       # Counter of the times `bytepos` is called on each type of receiver
+       var position_call = new Counter[String]
+
+       # Counter of the times `bytepos` is called on each type of receiver
+       var bytepos_call = new Counter[String]
+
+       # Calls to the `first_byte` property of a FlatString
+       var first_byte_call = 0
+
+       # Calls to the `last_byte` property of a FlatString
+       var last_byte_call = 0
+
+       # Number of strings created with full length created
+       var str_full_created = 0
+
+       private fun show_string_stats do
+               print """
+Usage of Strings:
+
+Allocations, by type:
+               """
+               print "\t-FlatString = {flatstr_allocations}"
+               print "\t-FlatBuffer = {flatbuf_allocations}"
+               print "\t-Concat = {concat_allocations}"
+               print "\t-RopeBuffer = {ropebuf_allocations}"
+               print ""
+               print "Calls to length, by type:"
+               for k, v in length_calls do
+                       printn "\t{k} = {v}"
+                       if k == "FlatString" then printn " (cache misses {length_cache_miss[k]}, {div(length_cache_miss[k] * 100, v)}%)"
+                       printn "\n"
+               end
+               print "Indexed accesses, by type:"
+               for k, v in index_call do
+                       printn "\t{k} = {v}"
+                       if k == "Concat" then printn " (cache misses {concat_cache_miss}, {div(concat_cache_miss * 100, v)}%)"
+                       printn "\n"
+               end
+
+               print "Calls to bytelen for each type:"
+               for k, v in bytelen_call do
+                       print "\t{k} = {v}"
+               end
+
+               print "Calls to position for each type:"
+               for k, v in position_call do
+                       print "\t{k} = {v}"
+               end
+
+               print "Calls to bytepos for each type:"
+               for k, v in bytepos_call do
+                       print "\t{k} = {v}"
+               end
+
+               print "Calls to first_byte on FlatString {first_byte_call}"
+               print "Calls to last_byte on FlatString {last_byte_call}"
+
+               print "FlatStrings allocated with length {str_full_created} ({str_full_created.to_f/flatstr_allocations.to_f * 100.0 }%)"
+
+               print "Length of travel for index distribution:"
+               index_len.print_content
+
+               print "Byte length of the FlatStrings created:"
+               str_bytelen.print_content
+       end
+
+       redef fun run do
+               super
+               show_string_stats
+       end
+end
+
+redef fun exit(i) do
+       show_string_stats
+       super
+end
+
+redef class Concat
+       init do
+               sys.concat_allocations += 1
+       end
+
+       redef fun bytelen do
+               sys.bytelen_call.inc "Concat"
+               return super
+       end
+
+       redef fun [](i) do
+               sys.index_call.inc "Concat"
+               if flat_last_pos_start != -1 then
+                       var fsp = i - flat_last_pos_start
+                       if fsp >= 0 and fsp < flat_cache.length then return flat_cache[fsp]
+               end
+               sys.concat_cache_miss += 1
+               var s: String = self
+               var st = i
+               loop
+                       if s isa FlatString then break
+                       s = s.as(Concat)
+                       var lft = s.left
+                       var llen = lft.length
+                       if i >= llen then
+                               s = s.right
+                               i -= llen
+                       else
+                               s = s.left
+                       end
+               end
+               flat_last_pos_start = st - i
+               flat_cache = s
+               return s[i]
+       end
+end
+
+redef class FlatText
+       redef fun char_to_byte_index(index) do
+               var ln = length
+               assert index >= 0
+               assert index < ln
+
+               # Find best insertion point
+               var delta_begin = index
+               var delta_end = (ln - 1) - index
+               var delta_cache = (position - index).abs
+               var min = delta_begin
+               var its = items
+
+               if delta_cache < min then min = delta_cache
+               if delta_end < min then min = delta_end
+
+               var ns_i: Int
+               var my_i: Int
+
+               if min == delta_begin then
+                       ns_i = first_byte
+                       my_i = 0
+               else if min == delta_cache then
+                       ns_i = bytepos
+                       my_i = position
+               else
+                       ns_i = its.find_beginning_of_char_at(last_byte)
+                       my_i = length - 1
+               end
+
+               var from = ns_i
+
+               ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
+
+               var after = ns_i
+
+               sys.index_len.inc((after - from).abs)
+
+               position = index
+               bytepos = ns_i
+
+               return ns_i
+       end
+end
+
+redef class RopeBuffer
+       init do
+               sys.ropebuf_allocations += 1
+       end
+
+       redef fun bytelen do
+               sys.bytelen_call.inc "RopeBuffer"
+               return super
+       end
+
+       redef fun [](i) do
+               sys.index_call.inc "RopeBuffer"
+               return super
+       end
+end
+
+redef class FlatBuffer
+
+       init do
+               sys.flatbuf_allocations += 1
+       end
+
+       redef fun bytepos do
+               sys.bytepos_call.inc "FlatBuffer"
+               return super
+       end
+
+       redef fun bytepos=(p) do
+               sys.bytepos_call.inc "FlatBuffer"
+               super
+       end
+
+       redef fun position do
+               sys.position_call.inc "FlatBuffer"
+               return super
+       end
+
+       redef fun position=(p) do
+               sys.position_call.inc "FlatBuffer"
+               super
+       end
+
+       redef fun bytelen do
+               sys.bytelen_call.inc "FlatBuffer"
+               return super
+       end
+
+       redef fun length do
+               sys.length_calls.inc "FlatBuffer"
+               return super
+       end
+
+       redef fun [](i) do
+               sys.index_call.inc "FlatBuffer"
+               return super
+       end
+
+       redef fun char_to_byte_index(i) do
+               sys.index_call.inc "FlatBuffer"
+               return super
+       end
+end
+
+redef class FlatString
+
+       redef fun bytepos do
+               sys.bytepos_call.inc "FlatString"
+               return super
+       end
+
+       redef fun bytepos=(p) do
+               sys.bytepos_call.inc "FlatString"
+               super
+       end
+
+       redef fun position do
+               sys.position_call.inc "FlatString"
+               return super
+       end
+
+       redef fun position=(p) do
+               sys.position_call.inc "FlatString"
+               super
+       end
+
+       redef fun bytelen do
+               sys.bytelen_call.inc "FlatString"
+               return super
+       end
+
+       redef fun first_byte do
+               sys.first_byte_call += 1
+               return super
+       end
+
+       redef fun first_byte=(v) do
+               sys.first_byte_call += 1
+               super
+       end
+
+       redef fun last_byte do
+               sys.last_byte_call += 1
+               return super
+       end
+
+       redef fun last_byte=(v) do
+               sys.last_byte_call += 1
+               super
+       end
+
+       init do
+               sys.flatstr_allocations += 1
+       end
+
+       redef init with_infos(items, bytelen, from, to)
+       do
+               self.items = items
+               self.bytelen = bytelen
+               sys.str_bytelen.inc bytelen
+               first_byte = from
+               last_byte = to
+       end
+
+       redef init full(items, bytelen, from, to, length)
+       do
+               self.items = items
+               self.length = length
+               self.bytelen = bytelen
+               sys.str_bytelen.inc bytelen
+               sys.str_full_created += 1
+               first_byte = from
+               last_byte = to
+       end
+
+       private var length_cache: nullable Int = null
+
+       redef fun length do
+               sys.length_calls.inc "FlatString"
+               var l = length_cache
+               if l != null then return l
+               sys.length_cache_miss.inc "FlatString"
+               if bytelen == 0 then return 0
+               var st = first_byte
+               var its = items
+               var ln = 0
+               var lst = last_byte
+               while st <= lst do
+                       st += its.length_of_char_at(st)
+                       ln += 1
+               end
+               length_cache = ln
+               return ln
+       end
+
+       redef fun char_to_byte_index(i) do
+               sys.index_call.inc "FlatString"
+               return super
+       end
+end
index 3e10daa..ef83f4d 100644 (file)
@@ -31,3 +31,4 @@ fibonacci_word
 shootout_nsieve
 test_ropebuffer
 ui_test
+test_text_stat
index 3e10daa..ef83f4d 100644 (file)
@@ -31,3 +31,4 @@ fibonacci_word
 shootout_nsieve
 test_ropebuffer
 ui_test
+test_text_stat
diff --git a/tests/sav/nitcg/test_text_stat.res b/tests/sav/nitcg/test_text_stat.res
new file mode 100644 (file)
index 0000000..1066456
--- /dev/null
@@ -0,0 +1,58 @@
+Hello world !
+Usage of Strings:
+
+Allocations, by type:
+               
+       -FlatString = 32
+       -FlatBuffer = 2
+       -Concat = 0
+       -RopeBuffer = 0
+
+Calls to length, by type:
+       FlatString = 36 (cache misses 8, 22.22%)
+       FlatBuffer = 4
+Indexed accesses, by type:
+       FlatString = 17
+Calls to bytelen for each type:
+       FlatString = 70
+Calls to position for each type:
+       FlatString = 35
+Calls to bytepos for each type:
+       FlatString = 18
+Calls to first_byte on FlatString 153
+Calls to last_byte on FlatString 103
+FlatStrings allocated with length 81 (85.417%)
+Length of travel for index distribution:
+* null = 20 => occurences 83.333%, cumulative 83.333% 
+* 1 = 8 => occurences 21.053%, cumulative 73.684% 
+Byte length of the FlatStrings created:
+* null = 6 => occurences 4.286%, cumulative 4.286% 
+* 1 = 25 => occurences 16.234%, cumulative 20.13% 
+* 2 = 31 => occurences 18.452%, cumulative 36.905% 
+* 3 = 30 => occurences 16.393%, cumulative 50.273% 
+* 4 = 4 => occurences 2.02%, cumulative 48.485% 
+* 5 = 19 => occurences 8.92%, cumulative 53.991% 
+* 6 = 26 => occurences 11.404%, cumulative 61.842% 
+* 9 = 1 => occurences 0.412%, cumulative 58.436% 
+* 10 = 10 => occurences 3.876%, cumulative 58.915% 
+* 11 = 2 => occurences 0.733%, cumulative 56.41% 
+* 12 = 1 => occurences 0.348%, cumulative 54.007% 
+* 13 = 1 => occurences 0.333%, cumulative 52.0% 
+* 14 = 1 => occurences 0.319%, cumulative 50.16% 
+* 15 = 6 => occurences 1.84%, cumulative 50.0% 
+* 16 = 7 => occurences 2.053%, cumulative 49.853% 
+* 17 = 1 => occurences 0.281%, cumulative 48.034% 
+* 25 = 2 => occurences 0.542%, cumulative 46.883% 
+* 26 = 1 => occurences 0.261%, cumulative 45.431% 
+* 31 = 2 => occurences 0.505%, cumulative 44.444% 
+* 32 = 1 => occurences 0.244%, cumulative 43.171% 
+* 33 = 1 => occurences 0.236%, cumulative 42.08% 
+* 34 = 2 => occurences 0.459%, cumulative 41.284% 
+* 36 = 1 => occurences 0.222%, cumulative 40.222% 
+* 37 = 1 => occurences 0.216%, cumulative 39.309% 
+* 39 = 1 => occurences 0.21%, cumulative 38.445% 
+* 40 = 1 => occurences 0.204%, cumulative 37.628% 
+* 43 = 1 => occurences 0.199%, cumulative 36.853% 
+* 46 = 1 => occurences 0.194%, cumulative 36.117% 
+* 51 = 15 => occurences 2.841%, cumulative 38.068% 
+* 55 = 1 => occurences 0.184%, cumulative 37.201% 
diff --git a/tests/sav/test_text_stat.res b/tests/sav/test_text_stat.res
new file mode 100644 (file)
index 0000000..49e9adc
--- /dev/null
@@ -0,0 +1,58 @@
+Hello world !
+Usage of Strings:
+
+Allocations, by type:
+               
+       -FlatString = 32
+       -FlatBuffer = 2
+       -Concat = 0
+       -RopeBuffer = 0
+
+Calls to length, by type:
+       FlatString = 36 (cache misses 8, 22.22%)
+       FlatBuffer = 4
+Indexed accesses, by type:
+       FlatString = 17
+Calls to bytelen for each type:
+       FlatString = 70
+Calls to position for each type:
+       FlatString = 35
+Calls to bytepos for each type:
+       FlatString = 18
+Calls to first_byte on FlatString 153
+Calls to last_byte on FlatString 103
+FlatStrings allocated with length 81 (85.417%)
+Length of travel for index distribution:
+* 0 = 20 => occurences 83.333%, cumulative 83.333% 
+* 1 = 8 => occurences 21.053%, cumulative 73.684% 
+Byte length of the FlatStrings created:
+* 0 = 6 => occurences 4.317%, cumulative 4.317% 
+* 1 = 25 => occurences 16.34%, cumulative 20.261% 
+* 2 = 31 => occurences 18.563%, cumulative 37.126% 
+* 3 = 30 => occurences 16.484%, cumulative 50.549% 
+* 4 = 3 => occurences 1.523%, cumulative 48.223% 
+* 5 = 20 => occurences 9.434%, cumulative 54.245% 
+* 6 = 26 => occurences 11.454%, cumulative 62.115% 
+* 9 = 1 => occurences 0.413%, cumulative 58.678% 
+* 10 = 10 => occurences 3.891%, cumulative 59.144% 
+* 11 = 2 => occurences 0.735%, cumulative 56.618% 
+* 12 = 1 => occurences 0.35%, cumulative 54.196% 
+* 13 = 1 => occurences 0.334%, cumulative 52.174% 
+* 14 = 1 => occurences 0.321%, cumulative 50.321% 
+* 15 = 6 => occurences 1.846%, cumulative 50.154% 
+* 16 = 7 => occurences 2.059%, cumulative 50.0% 
+* 17 = 1 => occurences 0.282%, cumulative 48.169% 
+* 25 = 2 => occurences 0.543%, cumulative 47.011% 
+* 26 = 1 => occurences 0.262%, cumulative 45.55% 
+* 31 = 2 => occurences 0.506%, cumulative 44.557% 
+* 32 = 1 => occurences 0.244%, cumulative 43.276% 
+* 33 = 1 => occurences 0.237%, cumulative 42.18% 
+* 34 = 2 => occurences 0.46%, cumulative 41.379% 
+* 36 = 1 => occurences 0.223%, cumulative 40.312% 
+* 37 = 1 => occurences 0.216%, cumulative 39.394% 
+* 39 = 1 => occurences 0.211%, cumulative 38.526% 
+* 40 = 1 => occurences 0.205%, cumulative 37.705% 
+* 43 = 1 => occurences 0.2%, cumulative 36.926% 
+* 46 = 1 => occurences 0.195%, cumulative 36.187% 
+* 51 = 16 => occurences 3.036%, cumulative 38.33% 
+* 52 = 5 => occurences 0.923%, cumulative 38.192% 
diff --git a/tests/test_text_stat.nit b/tests/test_text_stat.nit
new file mode 100644 (file)
index 0000000..3d18a92
--- /dev/null
@@ -0,0 +1,17 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import text_stat
+
+print "Hello world !"