metrics: introduce Mendel analysis
authorAlexandre Terrasa <alexandre@moz-code.org>
Fri, 7 Mar 2014 06:04:03 +0000 (01:04 -0500)
committerAlexandre Terrasa <alexandre@moz-code.org>
Fri, 7 Mar 2014 18:45:07 +0000 (13:45 -0500)
This analysis can be used to extract interesting classes from a class hierarchy

Signed-off-by: Alexandre Terrasa <alexandre@moz-code.org>

src/metrics/mendel_metrics.nit [new file with mode: 0644]
src/metrics/metrics.nit
src/metrics/metrics_base.nit

diff --git a/src/metrics/mendel_metrics.nit b/src/metrics/mendel_metrics.nit
new file mode 100644 (file)
index 0000000..cf1bc3a
--- /dev/null
@@ -0,0 +1,300 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Copyright 2014 Alexandre Terrasa <alexandre@moz-code.org>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The mndel model helps to understand class hierarchies
+#
+# It provides metrics to extract interesting classes:
+#
+# * Large classes that have a lot of local mproperties
+# * Budding classes that provide more mproperties than their superclasses
+# * Blooming classes that are both large and budding
+#
+# Also, this model helps to understand inheritance behviours between classes.
+# It provide metrics to categorize classes as:
+#
+# * pure overriders that contain only redefinitions
+# * overriders that contain more definitions than introductions
+# * pure extenders that contain only introductions
+# * extenders that contain more introduction than redefinitions
+#
+# Finally, this model can characterize overriding behaviors
+#
+# * pure specializers that always call to super in its redefinitions
+# * specializers that have more redefinitions that call super than not calling it
+# * pure replacers that never call to super in its redefinitions
+# * replacers that have less redefinitions that call super than not calling it
+#
+# For more details see
+#  Mendel: A Model, Metrics and Rules to Understan Class Hierarchies
+#  S. Denier and Y. Gueheneuc
+#  in Proceedings of the 16th IEEE International Conference on Program Comprehension (OCPC'08)
+module mendel_metrics
+
+import model
+import metrics_base
+import mclasses_metrics
+import phase
+import frontend
+
+redef class ToolContext
+       var mendel_metrics_phase = new MendelMetricsPhase(self, null)
+end
+
+private class MendelMetricsPhase
+       super Phase
+       redef fun process_mainmodule(mainmodule)
+       do
+               if not toolcontext.opt_mendel.value and not toolcontext.opt_all.value then return
+               var csv = toolcontext.opt_csv.value
+               var out = "{toolcontext.opt_dir.value or else "metrics"}/mendel"
+               out.mkdir
+
+               print toolcontext.format_h1("\n# Mendel metrics")
+
+               var vis = protected_visibility
+               var model = toolcontext.modelbuilder.model
+
+               var mclasses = new HashSet[MClass]
+               for mclass in model.mclasses do
+                       if mclass.visibility < vis then continue
+                       if mclass.is_interface then continue
+                       mclasses.add(mclass)
+               end
+
+               var cnblp = new CNBLP(mainmodule, vis)
+               var cnvi = new CNVI(mainmodule)
+               var cnvs = new CNVS(mainmodule)
+
+               var metrics = new MetricSet
+               metrics.register(cnblp, cnvi, cnvs)
+               metrics.collect(mclasses)
+               if csv then metrics.to_csv.save("{out}/mendel.csv")
+
+               print toolcontext.format_h4("\tlarge mclasses (threshold: {cnblp.threshold})")
+               for mclass in cnblp.above_threshold do
+                       print toolcontext.format_p("\t   {mclass.name}: {cnblp.values[mclass]}")
+               end
+
+               print toolcontext.format_h4("\tbudding mclasses (threshold: {cnvi.threshold})")
+               for mclass in cnvi.above_threshold do
+                       print toolcontext.format_p("\t   {mclass.name}: {cnvi.values[mclass]}")
+               end
+
+               print toolcontext.format_h4("\tblooming mclasses (threshold: {cnvs.threshold})")
+               for mclass in cnvs.above_threshold do
+                       print toolcontext.format_p("\t   {mclass.name}: {cnvs.values[mclass]}")
+               end
+
+               print toolcontext.format_h4("\tblooming mclasses (threshold: {cnvs.threshold})")
+               for mclass in cnvs.above_threshold do
+                       print toolcontext.format_p("\t   {mclass.name}: {cnvs.values[mclass]}")
+               end
+
+               if csv then
+                       var csvh = new CSVDocument
+                       csvh.header = ["povr", "ovr", "pext", "ext", "pspe", "spe", "prep", "rep", "eq"]
+                       for mclass in mclasses do
+                               var povr = mclass.is_pure_overrider(vis).object_id
+                               var ovr = mclass.is_overrider(vis).object_id
+                               var pext = mclass.is_pure_extender(vis).object_id
+                               var ext = mclass.is_extender(vis).object_id
+                               var pspe = mclass.is_pure_specializer(vis).object_id
+                               var spe = mclass.is_pure_specializer(vis).object_id
+                               var prep = mclass.is_pure_replacer(vis).object_id
+                               var rep = mclass.is_replacer(vis).object_id
+                               var eq = mclass.is_equal(vis).object_id
+                               csvh.add_line(povr, ovr, pext, ext, pspe, spe, prep, rep, eq)
+                       end
+                       csvh.save("{out}/inheritance_behaviour.csv")
+               end
+       end
+end
+
+# Class Branch Mean Size
+# cbms(class) = |TotS(class)| / (DIT(class) + 1)
+class CBMS
+       super MClassMetric
+       super FloatMetric
+       redef fun name do return "cbms"
+       redef fun desc do return "branch mean size, mean number of introduction available among ancestors"
+
+       var mainmodule: MModule
+       init(mainmodule: MModule) do self.mainmodule = mainmodule
+
+       redef fun collect(mclasses) do
+               for mclass in mclasses do
+                       var totc = mclass.all_mproperties(mainmodule, protected_visibility).length
+                       var ditc = mclass.in_hierarchy(mainmodule).depth
+                       values[mclass] = totc.to_f / (ditc + 1).to_f
+               end
+       end
+end
+
+# Class Novelty Index
+# cnvi = |LocS(class)| / cbms(parents(class))
+class CNVI
+       super MClassMetric
+       super FloatMetric
+       redef fun name do return "cnvi"
+       redef fun desc do return "class novelty index, contribution of the class to its branch in term of introductions"
+
+       var mainmodule: MModule
+       init(mainmodule: MModule) do self.mainmodule = mainmodule
+
+       redef fun collect(mclasses) do
+               var cbms = new CBMS(mainmodule)
+               for mclass in mclasses do
+                       # compute branch mean size
+                       var parents = mclass.in_hierarchy(mainmodule).direct_greaters
+                       if parents.length > 0 then
+                               cbms.clear
+                               cbms.collect(new HashSet[MClass].from(parents))
+                               # compute class novelty index
+                               var locc = mclass.local_mproperties(protected_visibility).length
+                               values[mclass] = locc.to_f / cbms.avg
+                       else
+                               values[mclass] = 0.0
+                       end
+               end
+       end
+end
+
+# Class Novelty Score
+# cnvs = |LocS(class)| x nvi
+class CNVS
+       super MClassMetric
+       super FloatMetric
+       redef fun name do return "cnvs"
+       redef fun desc do return "class novelty score, importance of the contribution of the class to its branch"
+
+       var mainmodule: MModule
+       init(mainmodule: MModule) do self.mainmodule = mainmodule
+
+       redef fun collect(mclasses) do
+               var cnvi = new CNVI(mainmodule)
+               cnvi.collect(mclasses)
+               for mclass in mclasses do
+                       var locc = mclass.local_mproperties(protected_visibility).length
+                       values[mclass] = cnvi.values[mclass] * locc.to_f
+               end
+       end
+end
+
+redef class MClass
+       # the set of redefition that call to super
+       fun extended_mproperties(min_visibility: MVisibility): Set[MProperty] do
+               var set = new HashSet[MProperty]
+               for mclassdef in mclassdefs do
+                       for mpropdef in mclassdef.mpropdefs do
+                               if mpropdef.mproperty.visibility < min_visibility then continue
+                               if not mpropdef.has_supercall then continue
+                               if mpropdef.mproperty.intro_mclassdef.mclass != self then set.add(mpropdef.mproperty)
+                       end
+               end
+               return set
+       end
+
+       # the set of redefition that do not call to super
+       fun overriden_mproperties(min_visibility: MVisibility): Set[MProperty] do
+               var set = new HashSet[MProperty]
+               for mclassdef in mclassdefs do
+                       for mpropdef in mclassdef.mpropdefs do
+                               if mpropdef.mproperty.visibility < min_visibility then continue
+                               if mpropdef.has_supercall then continue
+                               if mpropdef.mproperty.intro_mclassdef.mclass != self then set.add(mpropdef.mproperty)
+                       end
+               end
+               return set
+       end
+
+       # pure overriders contain only redefinitions
+       private fun is_pure_overrider(min_visibility: MVisibility): Bool do
+               var news = intro_mproperties(min_visibility).length
+               var locs = local_mproperties(min_visibility).length
+               if news == 0 and locs > 0 then return true
+               return false
+       end
+
+       # overriders contain more definitions than introductions
+       private fun is_overrider(min_visibility: MVisibility): Bool do
+               var rdfs = redef_mproperties(min_visibility).length
+               var news = intro_mproperties(min_visibility).length
+               var locs = local_mproperties(min_visibility).length
+               if rdfs >= news and locs > 0 then return true
+               return false
+       end
+
+       # pure extenders contain only introductions
+       private fun is_pure_extender(min_visibility: MVisibility): Bool do
+               var rdfs = redef_mproperties(min_visibility).length
+               var locs = local_mproperties(min_visibility).length
+               if rdfs == 0 and locs > 0 then return true
+               return false
+       end
+
+       # extenders contain more introduction than redefinitions
+       private fun is_extender(min_visibility: MVisibility): Bool do
+               var rdfs = redef_mproperties(min_visibility).length
+               var news = intro_mproperties(min_visibility).length
+               var locs = local_mproperties(min_visibility).length
+               if news > rdfs and locs > 0 then return true
+               return false
+       end
+
+       # pure specializers always call to super in its redefinitions
+       private fun is_pure_specializer(min_visibility: MVisibility): Bool do
+               var ovrs = overriden_mproperties(min_visibility).length
+               var rdfs = redef_mproperties(min_visibility).length
+               if ovrs == 0 and rdfs > 0 then return true
+               return false
+       end
+
+       # specializers have more redefinitions that call super than not calling it
+       private fun is_specializer(min_visibility: MVisibility): Bool do
+               var spcs = extended_mproperties(min_visibility).length
+               var ovrs = overriden_mproperties(min_visibility).length
+               var rdfs = redef_mproperties(min_visibility).length
+               if spcs > ovrs and rdfs > 0 then return true
+               return false
+       end
+
+       # pure replacers never call to super in its redefinitions
+       private fun is_pure_replacer(min_visibility: MVisibility): Bool do
+               var spcs = extended_mproperties(min_visibility).length
+               var rdfs = redef_mproperties(min_visibility).length
+               if spcs == 0 and rdfs > 0 then return true
+               return false
+       end
+
+       # replacers have less redefinitions that call super than not calling it
+       private fun is_replacer(min_visibility: MVisibility): Bool do
+               var spcs = extended_mproperties(min_visibility).length
+               var ovrs = overriden_mproperties(min_visibility).length
+               var rdfs = redef_mproperties(min_visibility).length
+               if ovrs > spcs and rdfs > 0 then return true
+               return false
+       end
+
+       # equals contain as redifinition than introduction
+       private fun is_equal(min_visibility: MVisibility): Bool do
+               var spcs = extended_mproperties(min_visibility).length
+               var ovrs = overriden_mproperties(min_visibility).length
+               var rdfs = redef_mproperties(min_visibility).length
+               if spcs == ovrs and rdfs > 0 then return true
+               return false
+       end
+end
+
index 3d693af..4025583 100644 (file)
@@ -20,6 +20,7 @@ module metrics
 import metrics_base
 import mmodules_metrics
 import mclasses_metrics
+import mendel_metrics
 import inheritance_metrics
 import refinement_metrics
 import static_types_metrics
index 9824179..056aba4 100644 (file)
@@ -32,7 +32,8 @@ redef class ToolContext
        var opt_mmodules = new OptionBool("Compute metrics about mmodules", "--mmodules")
        # --mclassses
        var opt_mclasses = new OptionBool("Compute metrics about mclasses", "--mclasses")
-
+       # --mendel
+       var opt_mendel = new OptionBool("Compute mendel metrics", "--mendel")
        # --inheritance
        var opt_inheritance = new OptionBool("Compute metrics about inheritance usage", "--inheritance")
        # --genericity
@@ -69,6 +70,7 @@ redef class ToolContext
                self.option_context.add_option(opt_all)
                self.option_context.add_option(opt_mmodules)
                self.option_context.add_option(opt_mclasses)
+               self.option_context.add_option(opt_mendel)
                self.option_context.add_option(opt_inheritance)
                self.option_context.add_option(opt_refinement)
                self.option_context.add_option(opt_self)
@@ -193,6 +195,12 @@ interface Metric
 
        # The element with the lowest value
        fun min: ELM is abstract
+
+       # The value threshold above what elements are considered as 'interesting'
+       fun threshold: Float do return avg + std_dev
+
+       # The set of element above the threshold
+       fun above_threshold: Set[ELM] is abstract
 end
 
 # A Metric that collects integer data
@@ -225,6 +233,15 @@ class IntMetric
        redef fun avg: Float do return values_cache.avg
 
        redef fun std_dev: Float do return values_cache.std_dev
+
+       redef fun above_threshold do
+               var above = new HashSet[ELM]
+               var threshold = threshold
+               for element, value in values do
+                       if value.to_f > threshold then above.add(element)
+               end
+               return above
+       end
 end
 
 # A Metric that collects float datas
@@ -284,6 +301,15 @@ class FloatMetric
                end
                return (sum / values.length.to_f).sqrt
        end
+
+       redef fun above_threshold do
+               var above = new HashSet[ELM]
+               var threshold = threshold
+               for element, value in values do
+                       if value > threshold then above.add(element)
+               end
+               return above
+       end
 end
 
 # A MetricSet is a metric holder