1 # This file is part of NIT (http://www.nitlanguage.org).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Formal Concept Analysis
17 # Formal concept analysis (FCA) is a principled way of deriving a concept hierarchy
18 # or formal ontology from a collection of objects and their properties.
19 # This means deriving implicit relationships between objects regarding their
22 # Each concept in the hierarchy represents the set of objects sharing the same
23 # values for a certain set of properties; and each sub-concept in the hierarchy
24 # contains a subset of the objects in the concepts above it.
26 # Formal concept analysis finds practical application in fields including data
27 # mining, text mining, machine learning or semantic web.
29 # ## Building a FormalContext
31 # We use the example from https://en.wikipedia.org/wiki/Formal_concept_analysis:
34 # var fc = new FormalContext[Int, String]
35 # fc.set_object_attributes(1, ["odd", "square"])
36 # fc.set_object_attributes(2, ["even", "prime"])
37 # fc.set_object_attributes(3, ["odd", "prime"])
38 # fc.set_object_attributes(4, ["even", "composite", "square"])
39 # fc.set_object_attributes(5, ["odd", "prime"])
40 # fc.set_object_attributes(6, ["even", "composite"])
41 # fc.set_object_attributes(7, ["odd", "prime"])
42 # fc.set_object_attributes(8, ["even", "composite"])
43 # fc.set_object_attributes(9, ["odd", "square", "composite"])
44 # fc.set_object_attributes(10, ["even", "composite"])
47 # ## Computing the set of FormalConcept
50 # var concepts = fc.formal_concepts
51 # for concept in concepts do
56 # ## Visualizing formal concept with ConceptLattice
59 # var cl = new ConceptLattice[Int, String].from_concepts(concepts)
68 # A formal context is a triple *K = (G, M, I)*, where *G* is a set of `objects`,
69 # *M* is a set of `attributes`, and *I ⊆ G × M* is a binary relation called incidence
70 # that expresses which objects have which attributes (`objects_attributes`).
72 # Predicate *gIm* designates object *g*'s having attribute *m*.
73 # For a subset *A ⊆ G* of objects and a subset *B ⊆ M* of attributes, one defines
74 # two derivation operators as follows:
76 # * *A' = {m ∈ M | ∀ g ∈ A, gIm}*, and dually
77 # * *B' = {g ∈ G | ∀ m ∈ B, gIm}*.
78 class FormalContext[O
: Object, A
: Object]
80 # Objects in the context
81 var objects
= new HashSet[O
]
83 # Attributes considered to build concepts
84 var attributes
= new HashSet[A
]
86 # Association between objects and attributes
87 var objects_attributes
= new HashMap[O
, Set[A
]]
89 # Associate a set of `attributes` to `object`
90 fun set_object_attributes
(object
: O
, attributes
: Collection[A
]) do
91 for attribute
in attributes
do
92 set_object_attribute
(object
, attribute
)
96 # Associate an `attribute` to `object`
97 fun set_object_attribute
(object
: O
, attribute
: A
) do
98 attributes
.add attribute
100 if not objects_attributes
.has_key
(object
) then
101 objects_attributes
[object
] = new HashSet[A
]
103 objects_attributes
[object
].add attribute
106 # Derive the set of formal concepts from the objects and attributes
107 fun formal_concepts
: Set[FormalConcept[O
, A
]] do
110 var concepts
= new HashSet[FormalConcept[O
, A
]]
112 var extentsByAttr
= new HashMap[Set[A
], Set[O
]]
113 for attribute
in attributes
do
114 var ka
= new HashSet[A
].from
([attribute
])
115 extentsByAttr
[ka
] = new HashSet[O
]
116 for object
in objects
do
117 if not objects_attributes
[object
].has
(attribute
) then continue
118 extentsByAttr
[ka
].add
(object
)
122 var nextents
= new HashMap[Set[A
], Set[O
]]
123 for k1
, v1
in extentsByAttr
do
124 for k2
, v2
in extentsByAttr
do
125 if k1
== k2
then continue
126 var n
= v1
.intersection
(v2
)
127 if extentsByAttr
.values
.has
(n
) then continue
128 var ka
= k1
.union
(k2
)
132 extentsByAttr
.add_all nextents
135 for k1
, v1
in extentsByAttr
do
136 if not contained
then break
137 for k2
, v2
in extentsByAttr
do
138 if k1
== k2
then continue
139 var n
= v1
.intersection
(v2
)
140 if extentsByAttr
.values
.has
(n
) then continue
147 extentsByAttr
[new HashSet[A
]] = new HashSet[O
].from
(objects
)
150 var extents
= new HashSet[Set[O
]]
151 for objects
in extentsByAttr
.values
do
155 for extent
in extents
do
156 var intents
: Set[A
] = new HashSet[A
]
158 var cl
= new FormalConcept[O
, A
]
159 if extent
.is_empty
then
160 intents
.add_all
(attributes
)
162 for object
in objects
do
163 if not extent
.has
(object
) then continue
164 var prev
= objects_attributes
[object
]
166 intents
= prev
.intersection
(intents
)
171 cl
.objects
.add
(object
)
174 cl
.attributes
.add_all intents
184 # A pair *(A,B)* is a formal concept of a FormalContext *(G, M, I)* provided that:
191 # Equivalently and more intuitively, *(A,B)* is a formal concept precisely when:
193 # * every object in *A* has every attribute in *B*,
194 # * for every object in *G* that is not in *A*, there is some attribute in *B* that
195 # the object does not have,
196 # * for every attribute in *M* that is not in *B*, there is some object in *A*
197 # that does not have that attribute.
198 class FormalConcept[O
: Object, A
: Object]
201 var attributes
= new HashSet[A
]
204 var objects
= new HashSet[O
]
206 # Is `self` a subconcept of `super_concept`?
208 # A concept C1 is a subconcept of C2 if C2 has all the objects of C1.
209 fun is_subconcept
(super_concept
: FormalConcept[O
, A
]): Bool do
210 if self == super_concept
then return false
211 if objects
.length
> super_concept
.objects
.length
then return false
212 return super_concept
.objects
.has_all
(objects
)
215 redef fun to_s
do return "{attributes}\n{objects}"
220 # Formal concepts are partially ordered with regard to inclusion of their extents
221 # or (which is equivalent) inverse inclusion of their intent.
222 class ConceptLattice[O
: Object, A
: Object]
223 super POSet[FormalConcept[O
, A
]]
225 # Build `self` from a set of formal `concepts`.
226 init from_concepts
(concepts
: Set[FormalConcept[O
, A
]]) do
230 for c1
in concepts
do
231 for c2
in concepts
do
232 if c1
== c2
then continue
233 if not is_lower_neighbour
(c1
, c2
, concepts
) then continue
239 # Is `sub` the greatest lower bound of `sup` considering all `concepts`?
240 fun is_lower_neighbour
(sub
, sup
: FormalConcept[O
, A
], concepts
: Set[FormalConcept[O
, A
]]): Bool
242 if sub
== sup
then return false
243 if not sub
.is_subconcept
(sup
) then return false
244 for concept
in concepts
do
245 if sub
== concept
then continue
246 if sup
== concept
then continue
247 if not sub
.is_subconcept
(concept
) then continue
248 if not concept
.is_subconcept
(sup
) then continue