lib: introduce `fca` a module for formal concept analysis
[nit.git] / lib / fca / fca.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Formal Concept Analysis
16 #
17 # Formal concept analysis (FCA) is a principled way of deriving a concept hierarchy
18 # or formal ontology from a collection of objects and their properties.
19 # This means deriving implicit relationships between objects regarding their
20 # attributes.
21 #
22 # Each concept in the hierarchy represents the set of objects sharing the same
23 # values for a certain set of properties; and each sub-concept in the hierarchy
24 # contains a subset of the objects in the concepts above it.
25 #
26 # Formal concept analysis finds practical application in fields including data
27 # mining, text mining, machine learning or semantic web.
28 #
29 # ## Building a FormalContext
30 #
31 # We use the example from https://en.wikipedia.org/wiki/Formal_concept_analysis:
32 #
33 # ~~~
34 # var fc = new FormalContext[Int, String]
35 # fc.set_object_attributes(1, ["odd", "square"])
36 # fc.set_object_attributes(2, ["even", "prime"])
37 # fc.set_object_attributes(3, ["odd", "prime"])
38 # fc.set_object_attributes(4, ["even", "composite", "square"])
39 # fc.set_object_attributes(5, ["odd", "prime"])
40 # fc.set_object_attributes(6, ["even", "composite"])
41 # fc.set_object_attributes(7, ["odd", "prime"])
42 # fc.set_object_attributes(8, ["even", "composite"])
43 # fc.set_object_attributes(9, ["odd", "square", "composite"])
44 # fc.set_object_attributes(10, ["even", "composite"])
45 # ~~~
46 #
47 # ## Computing the set of FormalConcept
48 #
49 # ~~~
50 # var concepts = fc.formal_concepts
51 # for concept in concepts do
52 # print concept
53 # end
54 # ~~~
55 #
56 # ## Visualizing formal concept with ConceptLattice
57 #
58 # ~~~nitish
59 # var cl = new ConceptLattice[Int, String].from_concepts(concepts)
60 # cl.show_dot
61 # ~~~
62 module fca
63
64 import poset
65
66 # Formal Context
67 #
68 # A formal context is a triple *K = (G, M, I)*, where *G* is a set of `objects`,
69 # *M* is a set of `attributes`, and *I ⊆ G × M* is a binary relation called incidence
70 # that expresses which objects have which attributes (`objects_attributes`).
71 #
72 # Predicate *gIm* designates object *g*'s having attribute *m*.
73 # For a subset *A ⊆ G* of objects and a subset *B ⊆ M* of attributes, one defines
74 # two derivation operators as follows:
75 #
76 # * *A' = {m ∈ M | ∀ g ∈ A, gIm}*, and dually
77 # * *B' = {g ∈ G | ∀ m ∈ B, gIm}*.
78 class FormalContext[O: Object, A: Object]
79
80 # Objects in the context
81 var objects = new HashSet[O]
82
83 # Attributes considered to build concepts
84 var attributes = new HashSet[A]
85
86 # Association between objects and attributes
87 var objects_attributes = new HashMap[O, Set[A]]
88
89 # Associate a set of `attributes` to `object`
90 fun set_object_attributes(object: O, attributes: Collection[A]) do
91 for attribute in attributes do
92 set_object_attribute(object, attribute)
93 end
94 end
95
96 # Associate an `attribute` to `object`
97 fun set_object_attribute(object: O, attribute: A) do
98 attributes.add attribute
99 objects.add object
100 if not objects_attributes.has_key(object) then
101 objects_attributes[object] = new HashSet[A]
102 end
103 objects_attributes[object].add attribute
104 end
105
106 # Derive the set of formal concepts from the objects and attributes
107 fun formal_concepts: Set[FormalConcept[O, A]] do
108 # black magic!
109
110 var concepts = new HashSet[FormalConcept[O, A]]
111
112 var extentsByAttr = new HashMap[Set[A], Set[O]]
113 for attribute in attributes do
114 var ka = new HashSet[A].from([attribute])
115 extentsByAttr[ka] = new HashSet[O]
116 for object in objects do
117 if not objects_attributes[object].has(attribute) then continue
118 extentsByAttr[ka].add(object)
119 end
120 end
121
122 var nextents = new HashMap[Set[A], Set[O]]
123 for k1, v1 in extentsByAttr do
124 for k2, v2 in extentsByAttr do
125 if k1 == k2 then continue
126 var n = v1.intersection(v2)
127 if extentsByAttr.values.has(n) then continue
128 var ka = k1.union(k2)
129 nextents[ka] = n
130 end
131 end
132 extentsByAttr.add_all nextents
133
134 var contained = true
135 for k1, v1 in extentsByAttr do
136 if not contained then break
137 for k2, v2 in extentsByAttr do
138 if k1 == k2 then continue
139 var n = v1.intersection(v2)
140 if extentsByAttr.values.has(n) then continue
141 contained = false
142 break
143 end
144 end
145
146 if contained then
147 extentsByAttr[new HashSet[A]] = new HashSet[O].from(objects)
148 end
149
150 var extents = new HashSet[Set[O]]
151 for objects in extentsByAttr.values do
152 extents.add objects
153 end
154
155 for extent in extents do
156 var intents: Set[A] = new HashSet[A]
157 var count = 0
158 var cl = new FormalConcept[O, A]
159 if extent.is_empty then
160 intents.add_all(attributes)
161 else
162 for object in objects do
163 if not extent.has(object) then continue
164 var prev = objects_attributes[object]
165 if count > 0 then
166 intents = prev.intersection(intents)
167 else
168 intents = prev
169 end
170 count += 1
171 cl.objects.add(object)
172 end
173 end
174 cl.attributes.add_all intents
175 concepts.add cl
176 end
177
178 return concepts
179 end
180 end
181
182 # Formal Concept
183 #
184 # A pair *(A,B)* is a formal concept of a FormalContext *(G, M, I)* provided that:
185 #
186 # * *A ⊆ G*,
187 # * *B ⊆ M*,
188 # * *A′ = B*, and
189 # * *B′ = A*.
190 #
191 # Equivalently and more intuitively, *(A,B)* is a formal concept precisely when:
192 #
193 # * every object in *A* has every attribute in *B*,
194 # * for every object in *G* that is not in *A*, there is some attribute in *B* that
195 # the object does not have,
196 # * for every attribute in *M* that is not in *B*, there is some object in *A*
197 # that does not have that attribute.
198 class FormalConcept[O: Object, A: Object]
199
200 # Concept attributes
201 var attributes = new HashSet[A]
202
203 # Concept objects
204 var objects = new HashSet[O]
205
206 # Is `self` a subconcept of `super_concept`?
207 #
208 # A concept C1 is a subconcept of C2 if C2 has all the objects of C1.
209 fun is_subconcept(super_concept: FormalConcept[O, A]): Bool do
210 if self == super_concept then return false
211 if objects.length > super_concept.objects.length then return false
212 return super_concept.objects.has_all(objects)
213 end
214
215 redef fun to_s do return "{attributes}\n{objects}"
216 end
217
218 # Concept Lattice
219 #
220 # Formal concepts are partially ordered with regard to inclusion of their extents
221 # or (which is equivalent) inverse inclusion of their intent.
222 class ConceptLattice[O: Object, A: Object]
223 super POSet[FormalConcept[O, A]]
224
225 # Build `self` from a set of formal `concepts`.
226 init from_concepts(concepts: Set[FormalConcept[O, A]]) do
227 for c in concepts do
228 add_node c
229 end
230 for c1 in concepts do
231 for c2 in concepts do
232 if c1 == c2 then continue
233 if not is_lower_neighbour(c1, c2, concepts) then continue
234 add_edge(c2, c1)
235 end
236 end
237 end
238
239 # Is `sub` the greatest lower bound of `sup` considering all `concepts`?
240 fun is_lower_neighbour(sub, sup: FormalConcept[O, A], concepts: Set[FormalConcept[O, A]]): Bool
241 do
242 if sub == sup then return false
243 if not sub.is_subconcept(sup) then return false
244 for concept in concepts do
245 if sub == concept then continue
246 if sup == concept then continue
247 if not sub.is_subconcept(concept) then continue
248 if not concept.is_subconcept(sup) then continue
249 return false
250 end
251 return true
252 end
253 end