From 6f7ac3e1fb3ca09a5923665fbd989625cb3771ff Mon Sep 17 00:00:00 2001 From: Alexandre Terrasa Date: Thu, 28 Sep 2017 19:31:48 -0400 Subject: [PATCH] lib: introduce `fca` a module for formal concept analysis Signed-off-by: Alexandre Terrasa --- lib/fca/fca.nit | 253 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 lib/fca/fca.nit diff --git a/lib/fca/fca.nit b/lib/fca/fca.nit new file mode 100644 index 0000000..b3b487a --- /dev/null +++ b/lib/fca/fca.nit @@ -0,0 +1,253 @@ +# This file is part of NIT (http://www.nitlanguage.org). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Formal Concept Analysis +# +# Formal concept analysis (FCA) is a principled way of deriving a concept hierarchy +# or formal ontology from a collection of objects and their properties. +# This means deriving implicit relationships between objects regarding their +# attributes. +# +# Each concept in the hierarchy represents the set of objects sharing the same +# values for a certain set of properties; and each sub-concept in the hierarchy +# contains a subset of the objects in the concepts above it. +# +# Formal concept analysis finds practical application in fields including data +# mining, text mining, machine learning or semantic web. +# +# ## Building a FormalContext +# +# We use the example from https://en.wikipedia.org/wiki/Formal_concept_analysis: +# +# ~~~ +# var fc = new FormalContext[Int, String] +# fc.set_object_attributes(1, ["odd", "square"]) +# fc.set_object_attributes(2, ["even", "prime"]) +# fc.set_object_attributes(3, ["odd", "prime"]) +# fc.set_object_attributes(4, ["even", "composite", "square"]) +# fc.set_object_attributes(5, ["odd", "prime"]) +# fc.set_object_attributes(6, ["even", "composite"]) +# fc.set_object_attributes(7, ["odd", "prime"]) +# fc.set_object_attributes(8, ["even", "composite"]) +# fc.set_object_attributes(9, ["odd", "square", "composite"]) +# fc.set_object_attributes(10, ["even", "composite"]) +# ~~~ +# +# ## Computing the set of FormalConcept +# +# ~~~ +# var concepts = fc.formal_concepts +# for concept in concepts do +# print concept +# end +# ~~~ +# +# ## Visualizing formal concept with ConceptLattice +# +# ~~~nitish +# var cl = new ConceptLattice[Int, String].from_concepts(concepts) +# cl.show_dot +# ~~~ +module fca + +import poset + +# Formal Context +# +# A formal context is a triple *K = (G, M, I)*, where *G* is a set of `objects`, +# *M* is a set of `attributes`, and *I ⊆ G × M* is a binary relation called incidence +# that expresses which objects have which attributes (`objects_attributes`). +# +# Predicate *gIm* designates object *g*'s having attribute *m*. +# For a subset *A ⊆ G* of objects and a subset *B ⊆ M* of attributes, one defines +# two derivation operators as follows: +# +# * *A' = {m ∈ M | ∀ g ∈ A, gIm}*, and dually +# * *B' = {g ∈ G | ∀ m ∈ B, gIm}*. +class FormalContext[O: Object, A: Object] + + # Objects in the context + var objects = new HashSet[O] + + # Attributes considered to build concepts + var attributes = new HashSet[A] + + # Association between objects and attributes + var objects_attributes = new HashMap[O, Set[A]] + + # Associate a set of `attributes` to `object` + fun set_object_attributes(object: O, attributes: Collection[A]) do + for attribute in attributes do + set_object_attribute(object, attribute) + end + end + + # Associate an `attribute` to `object` + fun set_object_attribute(object: O, attribute: A) do + attributes.add attribute + objects.add object + if not objects_attributes.has_key(object) then + objects_attributes[object] = new HashSet[A] + end + objects_attributes[object].add attribute + end + + # Derive the set of formal concepts from the objects and attributes + fun formal_concepts: Set[FormalConcept[O, A]] do + # black magic! + + var concepts = new HashSet[FormalConcept[O, A]] + + var extentsByAttr = new HashMap[Set[A], Set[O]] + for attribute in attributes do + var ka = new HashSet[A].from([attribute]) + extentsByAttr[ka] = new HashSet[O] + for object in objects do + if not objects_attributes[object].has(attribute) then continue + extentsByAttr[ka].add(object) + end + end + + var nextents = new HashMap[Set[A], Set[O]] + for k1, v1 in extentsByAttr do + for k2, v2 in extentsByAttr do + if k1 == k2 then continue + var n = v1.intersection(v2) + if extentsByAttr.values.has(n) then continue + var ka = k1.union(k2) + nextents[ka] = n + end + end + extentsByAttr.add_all nextents + + var contained = true + for k1, v1 in extentsByAttr do + if not contained then break + for k2, v2 in extentsByAttr do + if k1 == k2 then continue + var n = v1.intersection(v2) + if extentsByAttr.values.has(n) then continue + contained = false + break + end + end + + if contained then + extentsByAttr[new HashSet[A]] = new HashSet[O].from(objects) + end + + var extents = new HashSet[Set[O]] + for objects in extentsByAttr.values do + extents.add objects + end + + for extent in extents do + var intents: Set[A] = new HashSet[A] + var count = 0 + var cl = new FormalConcept[O, A] + if extent.is_empty then + intents.add_all(attributes) + else + for object in objects do + if not extent.has(object) then continue + var prev = objects_attributes[object] + if count > 0 then + intents = prev.intersection(intents) + else + intents = prev + end + count += 1 + cl.objects.add(object) + end + end + cl.attributes.add_all intents + concepts.add cl + end + + return concepts + end +end + +# Formal Concept +# +# A pair *(A,B)* is a formal concept of a FormalContext *(G, M, I)* provided that: +# +# * *A ⊆ G*, +# * *B ⊆ M*, +# * *A′ = B*, and +# * *B′ = A*. +# +# Equivalently and more intuitively, *(A,B)* is a formal concept precisely when: +# +# * every object in *A* has every attribute in *B*, +# * for every object in *G* that is not in *A*, there is some attribute in *B* that +# the object does not have, +# * for every attribute in *M* that is not in *B*, there is some object in *A* +# that does not have that attribute. +class FormalConcept[O: Object, A: Object] + + # Concept attributes + var attributes = new HashSet[A] + + # Concept objects + var objects = new HashSet[O] + + # Is `self` a subconcept of `super_concept`? + # + # A concept C1 is a subconcept of C2 if C2 has all the objects of C1. + fun is_subconcept(super_concept: FormalConcept[O, A]): Bool do + if self == super_concept then return false + if objects.length > super_concept.objects.length then return false + return super_concept.objects.has_all(objects) + end + + redef fun to_s do return "{attributes}\n{objects}" +end + +# Concept Lattice +# +# Formal concepts are partially ordered with regard to inclusion of their extents +# or (which is equivalent) inverse inclusion of their intent. +class ConceptLattice[O: Object, A: Object] + super POSet[FormalConcept[O, A]] + + # Build `self` from a set of formal `concepts`. + init from_concepts(concepts: Set[FormalConcept[O, A]]) do + for c in concepts do + add_node c + end + for c1 in concepts do + for c2 in concepts do + if c1 == c2 then continue + if not is_lower_neighbour(c1, c2, concepts) then continue + add_edge(c2, c1) + end + end + end + + # Is `sub` the greatest lower bound of `sup` considering all `concepts`? + fun is_lower_neighbour(sub, sup: FormalConcept[O, A], concepts: Set[FormalConcept[O, A]]): Bool + do + if sub == sup then return false + if not sub.is_subconcept(sup) then return false + for concept in concepts do + if sub == concept then continue + if sup == concept then continue + if not sub.is_subconcept(concept) then continue + if not concept.is_subconcept(sup) then continue + return false + end + return true + end +end -- 1.7.9.5