Merge: lib/core: Optimized `html_escape` for FlatText variants
authorJean Privat <jean@pryen.org>
Wed, 21 Oct 2015 01:10:06 +0000 (21:10 -0400)
committerJean Privat <jean@pryen.org>
Wed, 21 Oct 2015 01:10:06 +0000 (21:10 -0400)
As @privat requested, a byte-oriented optimized version of `FlatText` for `FlatString` as it is the most common case, like `escape_to_c`, it works at the byte level to accelerate fetching of characters.

On a short test program

~~~nit
var s = ""
for i in [0 .. 2000[ do s = "&lt;STRING&#47;&rt;".html_escape
~~~

Valgrind reports an old runtime of 18.208 MIr with a 8856 Ir/call to `html_escape`; the new runtime is 3.093 MIr, which translates to a 1298 Ir/call.

Pull-Request: #1775
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>

12 files changed:
lib/core/bytes.nit
lib/core/collection/hash_collection.nit
lib/hash_debug.nit
src/doc/doc_phases/doc_html.nit
src/doc/doc_phases/doc_phases.nit
src/doc/doc_phases/doc_test.nit [new file with mode: 0644]
src/loader.nit
src/nitdoc.nit
tests/nitdoc.args
tests/sav/test_hash_debug.res
tests/sav/test_test_phase_args2.res [new file with mode: 0644]
tests/test_test_phase.args

index 227759a..94d3d27 100644 (file)
@@ -289,6 +289,20 @@ redef class Text
                end
                return ret
        end
+
+       # Gets the hexdigest of the bytes of `self`
+       #
+       #     assert "&lt;STRING&#47;&rt;".hexdigest == "266C743B535452494E47262334373B2672743B"
+       fun hexdigest: String do
+               var ln = bytelen
+               var outns = new NativeString(ln * 2)
+               var oi = 0
+               for i in [0 .. ln[ do
+                       bytes[i].add_digest_at(outns, oi)
+                       oi += 2
+               end
+               return new FlatString.with_infos(outns, ln * 2, 0, ln * 2 - 1)
+       end
 end
 
 redef class FlatText
index 8ee3491..890ecd9 100644 (file)
@@ -24,7 +24,7 @@ end
 private abstract class HashCollection[K]
        type N: HashNode[K]
 
-       var array: nullable NativeArray[nullable N] = null # Used to store items
+       var array: NativeArray[nullable N] is noautoinit # Used to store items
        var capacity: Int = 0 # Size of _array
        var the_length: Int = 0 # Number of items in the map
 
@@ -50,6 +50,7 @@ private abstract class HashCollection[K]
        # Return the node associated with the key
        fun node_at(k: nullable Object): nullable N
        do
+               if _the_length == 0 then return null
                # cache: `is` is used instead of `==` because it is a faster filter (even if not exact)
                if k.is_same_instance(_last_accessed_key) then return _last_accessed_node
 
@@ -62,6 +63,7 @@ private abstract class HashCollection[K]
        # Return the node associated with the key (but with the index already known)
        fun node_at_idx(i: Int, k: nullable Object): nullable N
        do
+               if _the_length == 0 then return null
                var c = _array[i]
                while c != null do
                        var ck = c._key
@@ -111,6 +113,7 @@ private abstract class HashCollection[K]
        # Remove the node assosiated with the key
        fun remove_node(k: nullable Object)
        do
+               if _the_length == 0 then return
                var i = index_at(k)
                var node = node_at_idx(i, k)
                if node == null then return
@@ -162,7 +165,6 @@ private abstract class HashCollection[K]
        # Force a capacity
        fun enlarge(cap: Int)
        do
-               var old_cap = _capacity
                # get a new capacity
                if cap < _the_length + 1 then cap = _the_length + 1
                if cap <= _capacity then return
@@ -173,15 +175,6 @@ private abstract class HashCollection[K]
                var new_array = new NativeArray[nullable N](cap)
                _array = new_array
 
-               # clean the new array
-               var i = cap - 1
-               while i >=0 do
-                       new_array[i] = null
-                       i -= 1
-               end
-
-               if _capacity <= old_cap then return
-
                # Reput items in the array
                var node = _first_item
                while node != null do
@@ -253,6 +246,7 @@ class HashMap[K, V]
 
        redef fun []=(key, v)
        do
+               if _capacity == 0 then enlarge(17) # 17 because magic in `store`
                var i = index_at(key)
                var c = node_at_idx(i, key)
                if c != null then
@@ -269,7 +263,6 @@ class HashMap[K, V]
        do
                _capacity = 0
                _the_length = 0
-               enlarge(0)
        end
 
        redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self) is lazy
@@ -442,6 +435,7 @@ class HashSet[E]
 
        redef fun add(item)
        do
+               if _capacity == 0 then enlarge(17) # 17 because magic in `store`
                var i = index_at(item)
                var c = node_at_idx(i, item)
                if c != null then
@@ -461,7 +455,6 @@ class HashSet[E]
        do
                _capacity = 0
                _the_length = 0
-               enlarge(0)
        end
 
        # Build a list filled with the items of `coll`.
index 75d4160..5caf83d 100644 (file)
@@ -55,6 +55,13 @@ redef class Sys
        # Total capacity of hash collections receiver `HashCollection::store`
        var st_tot_cap = 0
 
+       # Number of calls of `HashCollection::enlarge`
+       var en_count = 0
+       # Total length of hash collections receiver of `HashCollection::enlarge`
+       var en_tot_length = 0
+       # Total capacity of hash collections receiver `HashCollection::enlarge`
+       var en_tot_cap = 0
+
        private fun div(n,d: Int): String
        do
                if d == 0 then return "NA"
@@ -90,6 +97,11 @@ number of collisions: {{{st_coll}}} ({{{div(st_coll*100,st_count)}}}%)
 average length of collisions: {{{div(st_tot_coll,st_coll)}}}
 average length of considered collections: {{{div(st_tot_length,sys.st_count)}}}
 average capacity or considered collections: {{{div(st_tot_cap,sys.st_count)}}} ({{{div(st_tot_cap*100,st_tot_length)}}}%)
+
+ENLARGE:
+number of enlarge: {{{en_count}}}
+average length of considered collections: {{{div(en_tot_length,sys.en_count)}}}
+average capacity or considered collections: {{{div(en_tot_cap,sys.en_count)}}} ({{{div(en_tot_cap*100,en_tot_length)}}}%)
 ~~~~~~"""
        end
 
@@ -133,6 +145,14 @@ redef class HashCollection[K]
                return super
        end
 
+       redef fun enlarge(c)
+       do
+               super
+               sys.en_count += 1
+               sys.en_tot_length += _the_length
+               sys.en_tot_cap += _capacity
+       end
+
        # Count and update length of collisions for `node_at_idx`
        # Note for dynamic call-graph analysis: callers of this functions are
        # responsible of collisions.
index ebc1193..30a15c2 100644 (file)
@@ -70,6 +70,9 @@ redef class ToolContext
        # FIXME redo the plugin
        var opt_github_gitdir = new OptionString("Git working directory used to resolve path name (ex: /home/me/mypackage/)", "--github-gitdir")
 
+       # Do not produce HTML files
+       var opt_no_render = new OptionBool("do not render HTML files", "--no-render")
+
        redef init do
                super
 
@@ -77,7 +80,8 @@ redef class ToolContext
                        opt_source, opt_sharedir, opt_shareurl, opt_custom_title,
                        opt_custom_footer, opt_custom_intro, opt_custom_brand,
                        opt_github_upstream, opt_github_base_sha1, opt_github_gitdir,
-                       opt_piwik_tracker, opt_piwik_site_id)
+                       opt_piwik_tracker, opt_piwik_site_id,
+                       opt_no_render)
        end
 
        redef fun process_options(args) do
@@ -103,6 +107,7 @@ class RenderHTMLPhase
        var name_sorter = new MEntityNameSorter
 
        redef fun apply do
+               if ctx.opt_no_render.value then return
                init_output_dir
                for page in doc.pages.values do
                        page.render(self, doc).write_to_file("{ctx.output_dir.to_s}/{page.html_url}")
index fefd1ac..24969e4 100644 (file)
@@ -19,3 +19,4 @@ module doc_phases
 
 import doc_html
 import doc_indexing
+import doc_test
diff --git a/src/doc/doc_phases/doc_test.nit b/src/doc/doc_phases/doc_test.nit
new file mode 100644 (file)
index 0000000..ddb5eb7
--- /dev/null
@@ -0,0 +1,59 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Print the generated DocModel in stdout.
+#
+# Mainly used for tests.
+module doc_test
+
+import doc_structure
+import counter
+
+redef class ToolContext
+
+       # File pattern used to link documentation to source code.
+       var opt_test = new OptionBool("print test data", "--test")
+
+       redef init do
+               super
+               option_context.add_option(opt_test)
+       end
+end
+
+# Display the DocModel in stdout.
+class DocTestPhase
+       super DocPhase
+
+       redef fun apply do
+               if not ctx.opt_test.value then return
+               # Pages metrics
+               var page_counter = new Counter[String]
+               var pages = doc.pages.keys.to_a
+               default_comparator.sort(pages)
+               for title in pages do
+                       var page = doc.pages[title]
+                       page_counter.inc page.class_name
+                       print page.pretty_print.write_to_string
+               end
+               print "Generated {doc.pages.length} pages"
+               page_counter.print_elements(100)
+               # Model metrics
+               var model_counter = new Counter[String]
+               for mentity in doc.mentities do
+                       model_counter.inc mentity.class_name
+               end
+               print "Found {doc.mentities.length} mentities"
+               model_counter.print_elements(100)
+       end
+end
index aa88cdc..9b79c62 100644 (file)
@@ -152,6 +152,11 @@ redef class ModelBuilder
 
                        var mmodule = identify_module(a)
                        if mmodule == null then
+                               if a.file_exists then
+                                       toolcontext.error(null, "Error: `{a}` is not a Nit source file.")
+                               else
+                                       toolcontext.error(null, "Error: cannot find module `{a}`.")
+                               end
                                continue
                        end
 
index bda79e7..2591863 100644 (file)
@@ -19,19 +19,12 @@ module nitdoc
 
 import modelbuilder
 import doc
-import counter
 
 redef class ToolContext
        # Nitdoc generation phase.
        var docphase: Phase = new Nitdoc(self, null)
 
-       # File pattern used to link documentation to source code.
-       var opt_test = new OptionBool("do not render anything, only print test data", "--test")
-
-       redef init do
-               super
-               option_context.add_option(opt_test)
-       end
+       init do super # to fix ambiguous linearization
 end
 
 # Nitdoc phase explores the model and generate pages for each mentities found
@@ -52,37 +45,14 @@ private class Nitdoc
                        new IntroRedefListPhase(toolcontext, doc),
                        new LinListPhase(toolcontext, doc),
                        new GraphPhase(toolcontext, doc),
-                       new ReadmePhase(toolcontext, doc): DocPhase]
-
-               if not toolcontext.opt_test.value then
-                       phases.add new RenderHTMLPhase(toolcontext, doc)
-               end
+                       new ReadmePhase(toolcontext, doc),
+                       new RenderHTMLPhase(toolcontext, doc),
+                       new DocTestPhase(toolcontext, doc): DocPhase]
 
                for phase in phases do
                        toolcontext.info("# {phase.class_name}", 1)
                        phase.apply
                end
-
-               if toolcontext.opt_test.value then
-                       # Pages metrics
-                       var page_counter = new Counter[String]
-                       var pages = doc.pages.keys.to_a
-                       default_comparator.sort(pages)
-                       for title in pages do
-                               var page = doc.pages[title]
-                               page_counter.inc page.class_name
-                               print page.pretty_print.write_to_string
-                       end
-                       print "Generated {doc.pages.length} pages"
-                       page_counter.print_elements(100)
-                       # Model metrics
-                       var model_counter = new Counter[String]
-                       for mentity in doc.mentities do
-                               model_counter.inc mentity.class_name
-                       end
-                       print "Found {doc.mentities.length} mentities"
-                       model_counter.print_elements(100)
-               end
        end
 end
 
index de4c3e4..ee5fd03 100644 (file)
@@ -1,4 +1,4 @@
 module_1.nit -d $WRITE
 base_attr_nullable.nit -d $WRITE
 --private base_attr_nullable.nit -d $WRITE
---test test_prog -d $WRITE
+--no-render --test test_prog -d $WRITE
index b52a9c5..c9b5f48 100644 (file)
 
 a1
 false
+~~~No hash statistics~~~
 ~~~Hash statistics~~~
 GET:
 number of get and has_key: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity of considered collections: 1.00 (NA%)
+average capacity of considered collections: 17.00 (NA%)
 
 STORE:
-number of stores: 0
-number of collisions: 0 (NA%)
-average length of collisions: NA
-average length of considered collections: NA
-average capacity or considered collections: NA (NA%)
-~~~~~~
-~~~Hash statistics~~~
-GET:
-number of get and has_key: 2
+number of stores: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity of considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
 
-STORE:
-number of stores: 1
-number of collisions: 0 (0.00%)
-average length of collisions: NA
+ENLARGE:
+number of enlarge: 1
 average length of considered collections: 0.00
-average capacity or considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 3
+number of get and has_key: 2
 number of collisions: 0 (0.00%)
 average length of collisions: NA
-average length of considered collections: 0.33
-average capacity of considered collections: 6.33 (1900.00%)
+average length of considered collections: 0.50
+average capacity of considered collections: 17.00 (3400.00%)
 
 STORE:
 number of stores: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity or considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 true
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 4
+number of get and has_key: 2
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.50
-average capacity of considered collections: 9.00 (1800.00%)
+average capacity of considered collections: 17.00 (3400.00%)
 
 STORE:
 number of stores: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity or considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 
 a2
 false
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 5
+number of get and has_key: 3
 number of collisions: 0 (0.00%)
 average length of collisions: NA
-average length of considered collections: 0.60
-average capacity of considered collections: 10.60 (1766.67%)
+average length of considered collections: 0.67
+average capacity of considered collections: 17.00 (2550.00%)
 
 STORE:
 number of stores: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity or considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 6
+number of get and has_key: 4
 number of collisions: 0 (0.00%)
 average length of collisions: NA
-average length of considered collections: 0.67
-average capacity of considered collections: 11.67 (1750.00%)
+average length of considered collections: 0.75
+average capacity of considered collections: 17.00 (2266.67%)
 
 STORE:
 number of stores: 2
 number of collisions: 1 (50.00%)
 average length of collisions: 2.00
 average length of considered collections: 0.50
-average capacity or considered collections: 9.00 (1800.00%)
+average capacity or considered collections: 17.00 (3400.00%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 7
-number of collisions: 1 (14.29%)
+number of get and has_key: 5
+number of collisions: 1 (20.00%)
 average length of collisions: 2.00
-average length of considered collections: 0.86
-average capacity of considered collections: 12.43 (1450.00%)
+average length of considered collections: 1.00
+average capacity of considered collections: 17.00 (1700.00%)
 
 STORE:
 number of stores: 2
 number of collisions: 1 (50.00%)
 average length of collisions: 2.00
 average length of considered collections: 0.50
-average capacity or considered collections: 9.00 (1800.00%)
+average capacity or considered collections: 17.00 (3400.00%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 true
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 7
-number of collisions: 1 (14.29%)
+number of get and has_key: 5
+number of collisions: 1 (20.00%)
 average length of collisions: 2.00
-average length of considered collections: 0.86
-average capacity of considered collections: 12.43 (1450.00%)
+average length of considered collections: 1.00
+average capacity of considered collections: 17.00 (1700.00%)
 
 STORE:
 number of stores: 2
 number of collisions: 1 (50.00%)
 average length of collisions: 2.00
 average length of considered collections: 0.50
-average capacity or considered collections: 9.00 (1800.00%)
+average capacity or considered collections: 17.00 (3400.00%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 
 end
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 7
-number of collisions: 1 (14.29%)
+number of get and has_key: 5
+number of collisions: 1 (20.00%)
 average length of collisions: 2.00
-average length of considered collections: 0.86
-average capacity of considered collections: 12.43 (1450.00%)
+average length of considered collections: 1.00
+average capacity of considered collections: 17.00 (1700.00%)
 
 STORE:
 number of stores: 2
 number of collisions: 1 (50.00%)
 average length of collisions: 2.00
 average length of considered collections: 0.50
-average capacity or considered collections: 9.00 (1800.00%)
+average capacity or considered collections: 17.00 (3400.00%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
diff --git a/tests/sav/test_test_phase_args2.res b/tests/sav/test_test_phase_args2.res
new file mode 100644 (file)
index 0000000..729ac10
--- /dev/null
@@ -0,0 +1,3 @@
+Error: cannot find module `fail.nit`.
+Error: `README.md` is not a Nit source file.
+Errors: 2. Warnings: 0.
index 5f3aa2c..7409ad5 100644 (file)
@@ -1 +1,2 @@
 base_simple3.nit
+fail.nit base_simple3.nit README.md