lib: introduce `fca` a module for formal concept analysis
authorAlexandre Terrasa <alexandre@moz-code.org>
Thu, 28 Sep 2017 23:31:48 +0000 (19:31 -0400)
committerAlexandre Terrasa <alexandre@moz-code.org>
Thu, 28 Sep 2017 23:31:48 +0000 (19:31 -0400)
Signed-off-by: Alexandre Terrasa <alexandre@moz-code.org>

lib/fca/fca.nit [new file with mode: 0644]

diff --git a/lib/fca/fca.nit b/lib/fca/fca.nit
new file mode 100644 (file)
index 0000000..b3b487a
--- /dev/null
@@ -0,0 +1,253 @@
+# This file is part of NIT (http://www.nitlanguage.org).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Formal Concept Analysis
+#
+# Formal concept analysis (FCA) is a principled way of deriving a concept hierarchy
+# or formal ontology from a collection of objects and their properties.
+# This means deriving implicit relationships between objects regarding their
+# attributes.
+#
+# Each concept in the hierarchy represents the set of objects sharing the same
+# values for a certain set of properties; and each sub-concept in the hierarchy
+# contains a subset of the objects in the concepts above it.
+#
+# Formal concept analysis finds practical application in fields including data
+# mining, text mining, machine learning or semantic web.
+#
+# ## Building a FormalContext
+#
+# We use the example from https://en.wikipedia.org/wiki/Formal_concept_analysis:
+#
+# ~~~
+# var fc = new FormalContext[Int, String]
+# fc.set_object_attributes(1, ["odd", "square"])
+# fc.set_object_attributes(2, ["even", "prime"])
+# fc.set_object_attributes(3, ["odd", "prime"])
+# fc.set_object_attributes(4, ["even", "composite", "square"])
+# fc.set_object_attributes(5, ["odd", "prime"])
+# fc.set_object_attributes(6, ["even", "composite"])
+# fc.set_object_attributes(7, ["odd", "prime"])
+# fc.set_object_attributes(8, ["even", "composite"])
+# fc.set_object_attributes(9, ["odd", "square", "composite"])
+# fc.set_object_attributes(10, ["even", "composite"])
+# ~~~
+#
+# ## Computing the set of FormalConcept
+#
+# ~~~
+# var concepts = fc.formal_concepts
+# for concept in concepts do
+#      print concept
+# end
+# ~~~
+#
+# ## Visualizing formal concept with ConceptLattice
+#
+# ~~~nitish
+# var cl = new ConceptLattice[Int, String].from_concepts(concepts)
+# cl.show_dot
+# ~~~
+module fca
+
+import poset
+
+# Formal Context
+#
+# A formal context is a triple *K = (G, M, I)*, where *G* is a set of `objects`,
+# *M* is a set of `attributes`, and *I ⊆ G × M* is a binary relation called incidence
+# that expresses which objects have which attributes (`objects_attributes`).
+#
+# Predicate *gIm* designates object *g*'s having attribute *m*.
+# For a subset *A ⊆ G* of objects and a subset *B ⊆ M* of attributes, one defines
+# two derivation operators as follows:
+#
+# * *A' = {m ∈ M | ∀ g ∈ A, gIm}*, and dually
+# * *B' = {g ∈ G | ∀ m ∈ B, gIm}*.
+class FormalContext[O: Object, A: Object]
+
+       # Objects in the context
+       var objects = new HashSet[O]
+
+       # Attributes considered to build concepts
+       var attributes = new HashSet[A]
+
+       # Association between objects and attributes
+       var objects_attributes = new HashMap[O, Set[A]]
+
+       # Associate a set of `attributes` to `object`
+       fun set_object_attributes(object: O, attributes: Collection[A]) do
+               for attribute in attributes do
+                       set_object_attribute(object, attribute)
+               end
+       end
+
+       # Associate an `attribute` to `object`
+       fun set_object_attribute(object: O, attribute: A) do
+               attributes.add attribute
+               objects.add object
+               if not objects_attributes.has_key(object) then
+                       objects_attributes[object] = new HashSet[A]
+               end
+               objects_attributes[object].add attribute
+       end
+
+       # Derive the set of formal concepts from the objects and attributes
+       fun formal_concepts: Set[FormalConcept[O, A]] do
+               # black magic!
+
+               var concepts = new HashSet[FormalConcept[O, A]]
+
+               var extentsByAttr = new HashMap[Set[A], Set[O]]
+               for attribute in attributes do
+                       var ka = new HashSet[A].from([attribute])
+                       extentsByAttr[ka] = new HashSet[O]
+                       for object in objects do
+                               if not objects_attributes[object].has(attribute) then continue
+                               extentsByAttr[ka].add(object)
+                       end
+               end
+
+               var nextents = new HashMap[Set[A], Set[O]]
+               for k1, v1 in extentsByAttr do
+                       for k2, v2 in extentsByAttr do
+                               if k1 == k2 then continue
+                               var n = v1.intersection(v2)
+                               if extentsByAttr.values.has(n) then continue
+                               var ka = k1.union(k2)
+                               nextents[ka] = n
+                       end
+               end
+               extentsByAttr.add_all nextents
+
+               var contained = true
+               for k1, v1 in extentsByAttr do
+                       if not contained then break
+                       for k2, v2 in extentsByAttr do
+                               if k1 == k2 then continue
+                               var n = v1.intersection(v2)
+                               if extentsByAttr.values.has(n) then continue
+                               contained = false
+                               break
+                       end
+               end
+
+               if contained then
+                       extentsByAttr[new HashSet[A]] = new HashSet[O].from(objects)
+               end
+
+               var extents = new HashSet[Set[O]]
+               for objects in extentsByAttr.values do
+                       extents.add objects
+               end
+
+               for extent in extents do
+                       var intents: Set[A] = new HashSet[A]
+                       var count = 0
+                       var cl = new FormalConcept[O, A]
+                       if extent.is_empty then
+                               intents.add_all(attributes)
+                       else
+                               for object in objects do
+                                       if not extent.has(object) then continue
+                                       var prev = objects_attributes[object]
+                                       if count > 0 then
+                                               intents = prev.intersection(intents)
+                                       else
+                                               intents = prev
+                                       end
+                                       count += 1
+                                       cl.objects.add(object)
+                               end
+                       end
+                       cl.attributes.add_all intents
+                       concepts.add cl
+               end
+
+               return concepts
+       end
+end
+
+# Formal Concept
+#
+# A pair *(A,B)* is a formal concept of a FormalContext *(G, M, I)* provided that:
+#
+# * *A ⊆ G*,
+# * *B ⊆ M*,
+# * *A′ = B*, and
+# * *B′ = A*.
+#
+# Equivalently and more intuitively, *(A,B)* is a formal concept precisely when:
+#
+# * every object in *A* has every attribute in *B*,
+# * for every object in *G* that is not in *A*, there is some attribute in *B* that
+#   the object does not have,
+# * for every attribute in *M* that is not in *B*, there is some object in *A*
+#   that does not have that attribute.
+class FormalConcept[O: Object, A: Object]
+
+       # Concept attributes
+       var attributes = new HashSet[A]
+
+       # Concept objects
+       var objects = new HashSet[O]
+
+       # Is `self` a subconcept of `super_concept`?
+       #
+       # A concept C1 is a subconcept of C2 if C2 has all the objects of C1.
+       fun is_subconcept(super_concept: FormalConcept[O, A]): Bool do
+               if self == super_concept then return false
+               if objects.length > super_concept.objects.length then return false
+               return super_concept.objects.has_all(objects)
+       end
+
+       redef fun to_s do return "{attributes}\n{objects}"
+end
+
+# Concept Lattice
+#
+# Formal concepts are partially ordered with regard to inclusion of their extents
+# or (which is equivalent) inverse inclusion of their intent.
+class ConceptLattice[O: Object, A: Object]
+       super POSet[FormalConcept[O, A]]
+
+       # Build `self` from a set of formal `concepts`.
+       init from_concepts(concepts: Set[FormalConcept[O, A]]) do
+               for c in concepts do
+                       add_node c
+               end
+               for c1 in concepts do
+                       for c2 in concepts do
+                               if c1 == c2 then continue
+                               if not is_lower_neighbour(c1, c2, concepts) then continue
+                               add_edge(c2, c1)
+                       end
+               end
+       end
+
+       # Is `sub` the greatest lower bound of `sup` considering all `concepts`?
+       fun is_lower_neighbour(sub, sup: FormalConcept[O, A], concepts: Set[FormalConcept[O, A]]): Bool
+       do
+               if sub == sup then return false
+               if not sub.is_subconcept(sup) then return false
+               for concept in concepts do
+                       if sub == concept then continue
+                       if sup == concept then continue
+                       if not sub.is_subconcept(concept) then continue
+                       if not concept.is_subconcept(sup) then continue
+                       return false
+               end
+               return true
+       end
+end