nitgs: add --direct-call-monomorph and --inline-some-methods
authorJean Privat <jean@pryen.org>
Thu, 27 Mar 2014 20:33:56 +0000 (16:33 -0400)
committerJean Privat <jean@pryen.org>
Tue, 1 Apr 2014 15:06:55 +0000 (11:06 -0400)
These code-generation-level global optimizations allow the separate
compiler to:

* first, use a direct call, instead of a VFT call when RTA gives inform
  that a callsite is monomorph (and that the receiver is a nonnull
  non parametric type)
* second, to inline static call when the mmethoddef is tagged accordingly
  (we reuse the `can_inline` service of the global compiler)

**DISCLAMER**
The point of these optimizations is to evaluate some extreme optimizations,
thus to bound the gain of much less aggressive optimizations.
Real direct call and inlining optimizations SHOULD be applied on the AST
or (some intermediate code level) to be combinable with other optimizations.

Some numbers for `nitg --separate nitg.nit`:

# base (no option)
total number of invocations: 45833
invocations by VFT send:     34556 (75.39%)
invocations by direct call:  9955 (21.72%)
invocations by inlinning:    1322 (2.88%)
user time 0m10.2s

# with --direct-call-monomorph
total number of invocations: 45833
invocations by VFT send:     9259 (20.20%)
invocations by direct call:  35199 (76.79%)
invocations by inlinning:    1375 (3.00%)
user 0m10.1s

# with --direct-call-monomorph and --inline-some-methods
total number of invocations: 46657
invocations by VFT send:     9701 (20.79%)
invocations by direct call:  26239 (56.23%)
invocations by inlinning:    10717 (22.96%)
user 0m10.0s

Discussion about the number:
Something is fishy, while there is a high reduction of VFT or direct calls,
there is no real gain in CPU time. Points to investigate:

* the optimized invocations are not in the hot path
  TODO gather dynamic numbers
* my CPU is very efficient when dealing with direct call and indirect calls
  TODO test on pratchett
* some issues with the measurements or with the measurer
  TODO go to sleep

Signed-off-by: Jean Privat <jean@pryen.org>

src/separate_compiler.nit

index 74d032b..cd5692f 100644 (file)
@@ -31,7 +31,11 @@ redef class ToolContext
        # --no-shortcut-equate
        var opt_no_shortcut_equate: OptionBool = new OptionBool("Always call == in a polymorphic way", "--no-shortcut-equal")
        # --inline-coloring-numbers
-       var opt_inline_coloring_numbers: OptionBool = new OptionBool("Inline colors and ids", "--inline-coloring-numbers")
+       var opt_inline_coloring_numbers: OptionBool = new OptionBool("Inline colors and ids (semi-global)", "--inline-coloring-numbers")
+       # --inline-some-methods
+       var opt_inline_some_methods: OptionBool = new OptionBool("Allow the separate compiler to inline some methods (semi-global)", "--inline-some-methods")
+       # --direct-call-monomorph
+       var opt_direct_call_monomorph: OptionBool = new OptionBool("Allow the separate compiler to direct call monomorph sites (semi-global)", "--direct-call-monomorph")
        # --use-naive-coloring
        var opt_bm_typing: OptionBool = new OptionBool("Colorize items incrementaly, used to simulate binary matrix typing", "--bm-typing")
        # --use-mod-perfect-hashing
@@ -48,7 +52,7 @@ redef class ToolContext
                self.option_context.add_option(self.opt_no_inline_intern)
                self.option_context.add_option(self.opt_no_union_attribute)
                self.option_context.add_option(self.opt_no_shortcut_equate)
-               self.option_context.add_option(self.opt_inline_coloring_numbers)
+               self.option_context.add_option(self.opt_inline_coloring_numbers, opt_inline_some_methods, opt_direct_call_monomorph)
                self.option_context.add_option(self.opt_bm_typing)
                self.option_context.add_option(self.opt_phmod_typing)
                self.option_context.add_option(self.opt_phand_typing)
@@ -1004,6 +1008,21 @@ class SeparateCompilerVisitor
                end
        end
 
+       redef fun compile_callsite(callsite, args)
+       do
+               var rta = compiler.runtime_type_analysis
+               var recv = args.first.mtype
+               if compiler.modelbuilder.toolcontext.opt_direct_call_monomorph.value and rta != null and recv isa MClassType then
+                       var tgs = rta.live_targets(callsite)
+                       if tgs.length == 1 then
+                               # DIRECT CALL
+                               var mmethod = callsite.mproperty
+                               self.varargize(mmethod.intro, mmethod.intro.msignature.as(not null), args)
+                               return call(tgs.first, recv, args)
+                       end
+               end
+               return super
+       end
        redef fun send(mmethod, arguments)
        do
                self.varargize(mmethod.intro, mmethod.intro.msignature.as(not null), arguments)
@@ -1136,14 +1155,15 @@ class SeparateCompilerVisitor
                        res = self.new_var(ret)
                end
 
-               if mmethoddef.is_intern and not compiler.modelbuilder.toolcontext.opt_no_inline_intern.value then
+               if (mmethoddef.is_intern and not compiler.modelbuilder.toolcontext.opt_no_inline_intern.value) or
+                       (compiler.modelbuilder.toolcontext.opt_inline_some_methods.value and mmethoddef.can_inline(self)) then
                        compiler.modelbuilder.nb_invok_by_inline += 1
                        var frame = new Frame(self, mmethoddef, recvtype, arguments)
                        frame.returnlabel = self.get_name("RET_LABEL")
                        frame.returnvar = res
                        var old_frame = self.frame
                        self.frame = frame
-                       self.add("\{ /* Inline {mmethoddef} ({arguments.join(",")}) */")
+                       self.add("\{ /* Inline {mmethoddef} ({arguments.join(",")}) on {arguments.first.inspect} */")
                        mmethoddef.compile_inside_to_c(self, arguments)
                        self.add("{frame.returnlabel.as(not null)}:(void)0;")
                        self.add("\}")
@@ -1157,7 +1177,7 @@ class SeparateCompilerVisitor
 
                self.require_declaration(mmethoddef.c_name)
                if res == null then
-                       self.add("{mmethoddef.c_name}({arguments.join(", ")});")
+                       self.add("{mmethoddef.c_name}({arguments.join(", ")}); /* Direct call {mmethoddef} on {arguments.first.inspect}*/")
                        return null
                else
                        self.add("{res} = {mmethoddef.c_name}({arguments.join(", ")});")