Formal Concept Analysis

Formal concept analysis (FCA) is a principled way of deriving a concept hierarchy or formal ontology from a collection of objects and their properties. This means deriving implicit relationships between objects regarding their attributes.

Each concept in the hierarchy represents the set of objects sharing the same values for a certain set of properties; and each sub-concept in the hierarchy contains a subset of the objects in the concepts above it.

Formal concept analysis finds practical application in fields including data mining, text mining, machine learning or semantic web.

Building a FormalContext

We use the example from https://en.wikipedia.org/wiki/Formal_concept_analysis:

var fc = new FormalContext[Int, String]
fc.set_object_attributes(1, ["odd", "square"])
fc.set_object_attributes(2, ["even", "prime"])
fc.set_object_attributes(3, ["odd", "prime"])
fc.set_object_attributes(4, ["even", "composite", "square"])
fc.set_object_attributes(5, ["odd", "prime"])
fc.set_object_attributes(6, ["even", "composite"])
fc.set_object_attributes(7, ["odd", "prime"])
fc.set_object_attributes(8, ["even", "composite"])
fc.set_object_attributes(9, ["odd", "square", "composite"])
fc.set_object_attributes(10, ["even", "composite"])

Computing the set of FormalConcept

var concepts = fc.formal_concepts
for concept in concepts do
    print concept
end

Visualizing formal concept with ConceptLattice

var cl = new ConceptLattice[Int, String].from_concepts(concepts)
cl.show_dot

Introduced classes

class ConceptLattice[O: Object, A: Object]

fca :: ConceptLattice

Concept Lattice
class FormalConcept[O: Object, A: Object]

fca :: FormalConcept

Formal Concept
class FormalContext[O: Object, A: Object]

fca :: FormalContext

Formal Context

All class definitions

class ConceptLattice[O: Object, A: Object]

fca $ ConceptLattice

Concept Lattice
class FormalConcept[O: Object, A: Object]

fca $ FormalConcept

Formal Concept
class FormalContext[O: Object, A: Object]

fca $ FormalContext

Formal Context
package_diagram fca::fca fca poset poset fca::fca->poset serialization serialization poset->serialization ...serialization ... ...serialization->serialization a_star-m a_star-m a_star-m->fca::fca

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module meta

meta :: meta

Simple user-defined meta-level to manipulate types of instances as object.
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module serialization_core

serialization :: serialization_core

Abstract services to serialize Nit objects to different formats
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module poset

poset :: poset

Pre order sets and partial order set (ie hierarchies)

Children

module a_star-m

a_star-m

# Formal Concept Analysis
#
# Formal concept analysis (FCA) is a principled way of deriving a concept hierarchy
# or formal ontology from a collection of objects and their properties.
# This means deriving implicit relationships between objects regarding their
# attributes.
#
# Each concept in the hierarchy represents the set of objects sharing the same
# values for a certain set of properties; and each sub-concept in the hierarchy
# contains a subset of the objects in the concepts above it.
#
# Formal concept analysis finds practical application in fields including data
# mining, text mining, machine learning or semantic web.
#
# ## Building a FormalContext
#
# We use the example from https://en.wikipedia.org/wiki/Formal_concept_analysis:
#
# ~~~
# var fc = new FormalContext[Int, String]
# fc.set_object_attributes(1, ["odd", "square"])
# fc.set_object_attributes(2, ["even", "prime"])
# fc.set_object_attributes(3, ["odd", "prime"])
# fc.set_object_attributes(4, ["even", "composite", "square"])
# fc.set_object_attributes(5, ["odd", "prime"])
# fc.set_object_attributes(6, ["even", "composite"])
# fc.set_object_attributes(7, ["odd", "prime"])
# fc.set_object_attributes(8, ["even", "composite"])
# fc.set_object_attributes(9, ["odd", "square", "composite"])
# fc.set_object_attributes(10, ["even", "composite"])
# ~~~
#
# ## Computing the set of FormalConcept
#
# ~~~
# var concepts = fc.formal_concepts
# for concept in concepts do
#	print concept
# end
# ~~~
#
# ## Visualizing formal concept with ConceptLattice
#
# ~~~nitish
# var cl = new ConceptLattice[Int, String].from_concepts(concepts)
# cl.show_dot
# ~~~
module fca

import poset

# Formal Context
#
# A formal context is a triple *K = (G, M, I)*, where *G* is a set of `objects`,
# *M* is a set of `attributes`, and *I ⊆ G × M* is a binary relation called incidence
# that expresses which objects have which attributes (`objects_attributes`).
#
# Predicate *gIm* designates object *g*'s having attribute *m*.
# For a subset *A ⊆ G* of objects and a subset *B ⊆ M* of attributes, one defines
# two derivation operators as follows:
#
# * *A' = {m ∈ M | ∀ g ∈ A, gIm}*, and dually
# * *B' = {g ∈ G | ∀ m ∈ B, gIm}*.
class FormalContext[O: Object, A: Object]

	# Objects in the context
	var objects = new HashSet[O]

	# Attributes considered to build concepts
	var attributes = new HashSet[A]

	# Association between objects and attributes
	var objects_attributes = new HashMap[O, Set[A]]

	# Associate a set of `attributes` to `object`
	fun set_object_attributes(object: O, attributes: Collection[A]) do
		for attribute in attributes do
			set_object_attribute(object, attribute)
		end
	end

	# Associate an `attribute` to `object`
	fun set_object_attribute(object: O, attribute: A) do
		attributes.add attribute
		objects.add object
		if not objects_attributes.has_key(object) then
			objects_attributes[object] = new HashSet[A]
		end
		objects_attributes[object].add attribute
	end

	# Derive the set of formal concepts from the objects and attributes
	fun formal_concepts: Set[FormalConcept[O, A]] do
		# black magic!

		var concepts = new HashSet[FormalConcept[O, A]]

		var extentsByAttr = new HashMap[Set[A], Set[O]]
		for attribute in attributes do
			var ka = new HashSet[A].from([attribute])
			extentsByAttr[ka] = new HashSet[O]
			for object in objects do
				if not objects_attributes[object].has(attribute) then continue
				extentsByAttr[ka].add(object)
			end
		end

		var nextents = new HashMap[Set[A], Set[O]]
		for k1, v1 in extentsByAttr do
			for k2, v2 in extentsByAttr do
				if k1 == k2 then continue
				var n = v1.intersection(v2)
				if extentsByAttr.values.has(n) then continue
				var ka = k1.union(k2)
				nextents[ka] = n
			end
		end
		extentsByAttr.add_all nextents

		var contained = true
		for k1, v1 in extentsByAttr do
			if not contained then break
			for k2, v2 in extentsByAttr do
				if k1 == k2 then continue
				var n = v1.intersection(v2)
				if extentsByAttr.values.has(n) then continue
				contained = false
				break
			end
		end

		if contained then
			extentsByAttr[new HashSet[A]] = new HashSet[O].from(objects)
		end

		var extents = new HashSet[Set[O]]
		for objects in extentsByAttr.values do
			extents.add objects
		end

		for extent in extents do
			var intents: Set[A] = new HashSet[A]
			var count = 0
			var cl = new FormalConcept[O, A]
			if extent.is_empty then
				intents.add_all(attributes)
			else
				for object in objects do
					if not extent.has(object) then continue
					var prev = objects_attributes[object]
					if count > 0 then
						intents = prev.intersection(intents)
					else
						intents = prev
					end
					count += 1
					cl.objects.add(object)
				end
			end
			cl.attributes.add_all intents
			concepts.add cl
		end

		return concepts
	end
end

# Formal Concept
#
# A pair *(A,B)* is a formal concept of a FormalContext *(G, M, I)* provided that:
#
# * *A ⊆ G*,
# * *B ⊆ M*,
# * *A′ = B*, and
# * *B′ = A*.
#
# Equivalently and more intuitively, *(A,B)* is a formal concept precisely when:
#
# * every object in *A* has every attribute in *B*,
# * for every object in *G* that is not in *A*, there is some attribute in *B* that
#   the object does not have,
# * for every attribute in *M* that is not in *B*, there is some object in *A*
#   that does not have that attribute.
class FormalConcept[O: Object, A: Object]

	# Concept attributes
	var attributes = new HashSet[A]

	# Concept objects
	var objects = new HashSet[O]

	# Is `self` a subconcept of `super_concept`?
	#
	# A concept C1 is a subconcept of C2 if C2 has all the objects of C1.
	fun is_subconcept(super_concept: FormalConcept[O, A]): Bool do
		if self == super_concept then return false
		if objects.length > super_concept.objects.length then return false
		return super_concept.objects.has_all(objects)
	end

	redef fun to_s do return "{attributes}\n{objects}"
end

# Concept Lattice
#
# Formal concepts are partially ordered with regard to inclusion of their extents
# or (which is equivalent) inverse inclusion of their intent.
class ConceptLattice[O: Object, A: Object]
	super POSet[FormalConcept[O, A]]

	# Build `self` from a set of formal `concepts`.
	init from_concepts(concepts: Set[FormalConcept[O, A]]) do
		for c in concepts do
			add_node c
		end
		for c1 in concepts do
			for c2 in concepts do
				if c1 == c2 then continue
				if not is_lower_neighbour(c1, c2, concepts) then continue
				add_edge(c2, c1)
			end
		end
	end

	# Is `sub` the greatest lower bound of `sup` considering all `concepts`?
	fun is_lower_neighbour(sub, sup: FormalConcept[O, A], concepts: Set[FormalConcept[O, A]]): Bool
	do
		if sub == sup then return false
		if not sub.is_subconcept(sup) then return false
		for concept in concepts do
			if sub == concept then continue
			if sup == concept then continue
			if not sub.is_subconcept(concept) then continue
			if not concept.is_subconcept(sup) then continue
			return false
		end
		return true
	end
end
lib/fca/fca.nit:15,1--253,3