From f8ca2a2208339aed2eb8e43b04f92249865be2d7 Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Thu, 27 Mar 2014 16:33:56 -0400 Subject: [PATCH] nitgs: add --direct-call-monomorph and --inline-some-methods These code-generation-level global optimizations allow the separate compiler to: * first, use a direct call, instead of a VFT call when RTA gives inform that a callsite is monomorph (and that the receiver is a nonnull non parametric type) * second, to inline static call when the mmethoddef is tagged accordingly (we reuse the `can_inline` service of the global compiler) **DISCLAMER** The point of these optimizations is to evaluate some extreme optimizations, thus to bound the gain of much less aggressive optimizations. Real direct call and inlining optimizations SHOULD be applied on the AST or (some intermediate code level) to be combinable with other optimizations. Some numbers for `nitg --separate nitg.nit`: # base (no option) total number of invocations: 45833 invocations by VFT send: 34556 (75.39%) invocations by direct call: 9955 (21.72%) invocations by inlinning: 1322 (2.88%) user time 0m10.2s # with --direct-call-monomorph total number of invocations: 45833 invocations by VFT send: 9259 (20.20%) invocations by direct call: 35199 (76.79%) invocations by inlinning: 1375 (3.00%) user 0m10.1s # with --direct-call-monomorph and --inline-some-methods total number of invocations: 46657 invocations by VFT send: 9701 (20.79%) invocations by direct call: 26239 (56.23%) invocations by inlinning: 10717 (22.96%) user 0m10.0s Discussion about the number: Something is fishy, while there is a high reduction of VFT or direct calls, there is no real gain in CPU time. Points to investigate: * the optimized invocations are not in the hot path TODO gather dynamic numbers * my CPU is very efficient when dealing with direct call and indirect calls TODO test on pratchett * some issues with the measurements or with the measurer TODO go to sleep Signed-off-by: Jean Privat --- src/separate_compiler.nit | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/separate_compiler.nit b/src/separate_compiler.nit index 74d032b..cd5692f 100644 --- a/src/separate_compiler.nit +++ b/src/separate_compiler.nit @@ -31,7 +31,11 @@ redef class ToolContext # --no-shortcut-equate var opt_no_shortcut_equate: OptionBool = new OptionBool("Always call == in a polymorphic way", "--no-shortcut-equal") # --inline-coloring-numbers - var opt_inline_coloring_numbers: OptionBool = new OptionBool("Inline colors and ids", "--inline-coloring-numbers") + var opt_inline_coloring_numbers: OptionBool = new OptionBool("Inline colors and ids (semi-global)", "--inline-coloring-numbers") + # --inline-some-methods + var opt_inline_some_methods: OptionBool = new OptionBool("Allow the separate compiler to inline some methods (semi-global)", "--inline-some-methods") + # --direct-call-monomorph + var opt_direct_call_monomorph: OptionBool = new OptionBool("Allow the separate compiler to direct call monomorph sites (semi-global)", "--direct-call-monomorph") # --use-naive-coloring var opt_bm_typing: OptionBool = new OptionBool("Colorize items incrementaly, used to simulate binary matrix typing", "--bm-typing") # --use-mod-perfect-hashing @@ -48,7 +52,7 @@ redef class ToolContext self.option_context.add_option(self.opt_no_inline_intern) self.option_context.add_option(self.opt_no_union_attribute) self.option_context.add_option(self.opt_no_shortcut_equate) - self.option_context.add_option(self.opt_inline_coloring_numbers) + self.option_context.add_option(self.opt_inline_coloring_numbers, opt_inline_some_methods, opt_direct_call_monomorph) self.option_context.add_option(self.opt_bm_typing) self.option_context.add_option(self.opt_phmod_typing) self.option_context.add_option(self.opt_phand_typing) @@ -1004,6 +1008,21 @@ class SeparateCompilerVisitor end end + redef fun compile_callsite(callsite, args) + do + var rta = compiler.runtime_type_analysis + var recv = args.first.mtype + if compiler.modelbuilder.toolcontext.opt_direct_call_monomorph.value and rta != null and recv isa MClassType then + var tgs = rta.live_targets(callsite) + if tgs.length == 1 then + # DIRECT CALL + var mmethod = callsite.mproperty + self.varargize(mmethod.intro, mmethod.intro.msignature.as(not null), args) + return call(tgs.first, recv, args) + end + end + return super + end redef fun send(mmethod, arguments) do self.varargize(mmethod.intro, mmethod.intro.msignature.as(not null), arguments) @@ -1136,14 +1155,15 @@ class SeparateCompilerVisitor res = self.new_var(ret) end - if mmethoddef.is_intern and not compiler.modelbuilder.toolcontext.opt_no_inline_intern.value then + if (mmethoddef.is_intern and not compiler.modelbuilder.toolcontext.opt_no_inline_intern.value) or + (compiler.modelbuilder.toolcontext.opt_inline_some_methods.value and mmethoddef.can_inline(self)) then compiler.modelbuilder.nb_invok_by_inline += 1 var frame = new Frame(self, mmethoddef, recvtype, arguments) frame.returnlabel = self.get_name("RET_LABEL") frame.returnvar = res var old_frame = self.frame self.frame = frame - self.add("\{ /* Inline {mmethoddef} ({arguments.join(",")}) */") + self.add("\{ /* Inline {mmethoddef} ({arguments.join(",")}) on {arguments.first.inspect} */") mmethoddef.compile_inside_to_c(self, arguments) self.add("{frame.returnlabel.as(not null)}:(void)0;") self.add("\}") @@ -1157,7 +1177,7 @@ class SeparateCompilerVisitor self.require_declaration(mmethoddef.c_name) if res == null then - self.add("{mmethoddef.c_name}({arguments.join(", ")});") + self.add("{mmethoddef.c_name}({arguments.join(", ")}); /* Direct call {mmethoddef} on {arguments.first.inspect}*/") return null else self.add("{res} = {mmethoddef.c_name}({arguments.join(", ")});") -- 1.7.9.5