Memory-efficient Cartesian products, combinations and permutation on collections.

This module offers memory-efficient views on combinatoric collections. Methods of the views create objects only when needed. Moreover, produced objects during iterations are free to be collected and their memory reused.

This enable these views and method to works with very combinatoric large collections.

When small combinatoric views need to be kept in memory (for fast access by example). The Collection::to_a method and other related factories can be used to transform the combinatoric views into extensive collections,

Introduced classes

class CartesianCollection[E: nullable Object]

combinations :: CartesianCollection

A view of a Cartesian-product collection over homogeneous collections.
class CombinationCollection[E: nullable Object]

combinations :: CombinationCollection

A view of some combinations over a base collections.

Redefined classes

redef interface Collection[E: nullable Object]

combinations :: combinations $ Collection

The root of the collection hierarchy.

All class definitions

class CartesianCollection[E: nullable Object]

combinations $ CartesianCollection

A view of a Cartesian-product collection over homogeneous collections.
redef interface Collection[E: nullable Object]

combinations :: combinations $ Collection

The root of the collection hierarchy.
class CombinationCollection[E: nullable Object]

combinations $ CombinationCollection

A view of some combinations over a base collections.
package_diagram combinations::combinations combinations core core combinations::combinations->core crapto::xor xor crapto::xor->combinations::combinations crapto::crapto crapto crapto::crapto->crapto::xor crapto::crapto... ... crapto::crapto...->crapto::crapto

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.

Children

module xor

crapto :: xor

Cryptographic attacks and utilities for XOR-based algorithms.

Descendants

module a_star-m

a_star-m

module crapto

crapto :: crapto

Cryptographic attacks and utilities.
# Memory-efficient Cartesian products, combinations and permutation on collections.
#
# This module offers memory-efficient views on combinatoric collections.
# Methods of the views create objects only when needed.
# Moreover, produced objects during iterations are free to be collected and
# their memory reused.
#
# This enable these views and method to works with very combinatoric large collections.
#
# When small combinatoric views need to be kept in memory (for fast access by example).
# The `Collection::to_a` method and other related factories can be used to transform
# the combinatoric views into extensive collections,
module combinations

redef class Collection[E]
	# Cartesian product, over `r` times `self`.
	#
	# See `CartesianCollection` for details.
	#
	# FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
	fun product(r: Int): Collection[SequenceRead[E]]
	do
		return new CartesianCollection[E]([self]*r)
	end

	# All `r`-length permutations on self (all possible ordering) without repeated elements.
	#
	# See `CartesianCollection` for details.
	#
	# FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
	fun permutations(r: Int): Collection[SequenceRead[E]]
	do
		var res = new CombinationCollection[E](self, r)
		res.are_sorted = false
		res.are_unique = true
		return res
	end

	# All `r`-length combinations on self (in same order) without repeated elements.
	#
	# See `CartesianCollection` for details.
	#
	# FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
	fun combinations(r: Int): Collection[SequenceRead[E]]
	do
		var res = new CombinationCollection[E](self, r)
		res.are_sorted = true
		res.are_unique = true
		return res
	end

	# All `r`-length combination on self (in same order) with repeated elements.
	#
	# See `CartesianCollection` for details.
	#
	# FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
	fun combinations_with_replacement(r: Int): Collection[SequenceRead[E]]
	do
		var res = new CombinationCollection[E](self, r)
		res.are_sorted = true
		res.are_unique = false
		return res
	end
end

# A view of a Cartesian-product collection over homogeneous collections.
#
# Therefore, this view *generates* all the sequences of elements constructed by associating
# en element for each one of the original collections.
#
# It is equivalent to doing nesting `for` for each collection.
#
# ~~~~
# var xs = [1, 2, 3]
# var ys = [8, 9]
# var xys = new CartesianCollection[Int]([xs, ys])
# assert xys.length == 6
# assert xys.to_a == [[1,8], [1,9], [2,8], [2,9], [3,8], [3,9]]
# ~~~~
#
# The pattern of the generate sequences produces a lexicographical order.
#
# Because it is a generator, it is memory-efficient and the sequences are created only when needed.
#
# Note: because it is a view, changes on the base collections are reflected on the view.
#
# ~~~~
# assert xs.pop == 3
# assert ys.pop == 9
# assert xys.to_a == [[1,8], [2,8]]
# ~~~~
class CartesianCollection[E]
	super Collection[SequenceRead[E]]

	# The base collections used to generate the sequences.
	var collections: SequenceRead[Collection[E]]

	redef fun length
	do
		var res = 1
		for c in collections do res = res * c.length
		return res
	end

	redef fun iterator do return new CartesianIterator[E](self)
end

private class CartesianIterator[E]
	super Iterator[SequenceRead[E]]
	var collection: CartesianCollection[E]

	# The array of iterations that will be increased in the lexicographic order.
	var iterators = new Array[Iterator[E]]

	init
	do
		for c in collection.collections do
			var it = c.iterator
			iterators.add it
			if not it.is_ok then is_ok = false
		end
	end

	redef var is_ok = true

	redef fun item
	do
		var len = iterators.length
		var res = new Array[E].with_capacity(len)
		for i in [0..len[ do
			var it = iterators[i]
			res.add(it.item)
		end
		return res
	end

	redef fun next
	do
		var rank = iterators.length - 1

		# Odometer-like increment starting from the last iterator
		loop
			var it = iterators[rank]
			it.next
			if it.is_ok then return

			# The iterator if over
			if rank == 0 then
				# It it is the first, then the whole thing is over
				is_ok = false
				return
			end

			# If not, restart the iterator and increment the previous one
			# (like a carry)
			iterators[rank] = collection.collections[rank].iterator
			rank -= 1
		end
	end
end

# A view of some combinations over a base collections.
#
# This view *generates* some combinations and permutations on a collection.
#
# By default, the generated sequences are combinations:
#
#   * each sequence has a length of `repeat`
#   * elements are in sorted order (see `are_sorted` for details)
#   * no repeated element (see `are_unique` for details)
#
# ~~~~
# var xs = [1, 2, 3]
# var cxs = new CombinationCollection[Int](xs, 2)
# assert cxs.length == 3
# assert cxs.to_a == [[1,2], [1,3], [2,3]]
# ~~~~
#
# Other kind of combinations can be generated by tweaking the attributes `are_sorted` and `are_unique`.
#
# * for permutation:
#
# ~~~~
# cxs.are_sorted = false
# cxs.are_unique = true
# assert cxs.length == 6
# assert cxs.to_a == [[1,2], [1,3], [2,1], [2,3], [3,1], [3,2]]
# ~~~~
#
# * for combinations with replacement:
#
# ~~~~
# cxs.are_sorted = true
# cxs.are_unique = false
# assert cxs.length == 6
# assert cxs.to_a == [[1,1], [1,2], [1,3], [2,2], [2,3], [3,3]]
# ~~~~
#
# * for product:
#
# ~~~~
# cxs.are_sorted = false
# cxs.are_unique = false
# assert cxs.length == 9
# assert cxs.to_a == [[1,1], [1,2], [1,3], [2,1], [2,2], [2,3], [3,1], [3,2], [3,3]]
# ~~~~
#
# However, in the last case, a faster alternative is to use `CartesianCollection`:
#
# ~~~~
# var cp = new CartesianCollection[Int]([xs] * 2)
# assert cp.to_a == cxs.to_a
# ~~~~
#
# As seen in the examples, the patterns of the generated sequences produce a lexicographical order.
#
# Because it is a generator, it is memory-efficient and the sequences are created only when needed.
#
# Note: because it is a view, changes on the base collection are reflected on the view.
#
# ~~~~
# assert xs.pop == 3
# cxs.are_sorted = true
# cxs.are_unique = true
# assert cxs.to_a == [[1,2]]
# ~~~~
class CombinationCollection[E]
	super Collection[SequenceRead[E]]

	# The base collection used to generate the sequences.
	var collection: Collection[E]

	# The maximum length of each generated sequence.
	var repeat: Int

	init
	do
		assert repeat >= 0
	end

	# Are the elements in the generated sequences sorted?
	# Default `true`.
	#
	# When `true`, the original order is preserved.
	#
	# Elements are compared by their order in the base collection,
	# not by their intrinsic value or comparability.
	#
	# ~~~~
	# var xs = [1, 1, 2]
	# var cxs = new CombinationCollection[Int](xs, 2)
	# cxs.are_sorted = true
	# assert cxs.to_a == [[1,1], [1,2], [1, 2]]
	# cxs.are_sorted = false
	# assert cxs.to_a == [[1,1], [1,2], [1, 1], [1, 2], [2, 1], [2, 1]]
	# ~~~~
	var are_sorted = true is writable

	# Are the element in the generated sequence unique?
	# Default `true`.
	#
	# When `true`, an element cannot be reused in the same sequence (no replacement).
	#
	# Elements are distinguished by their order in the base collection,
	# not by their intrinsic value or equality.
	#
	# ~~~~
	# var xs = [1, 1, 2]
	# var cxs = new CombinationCollection[Int](xs, 2)
	# cxs.are_unique = true
	# assert cxs.to_a == [[1,1], [1,2], [1, 2]]
	# cxs.are_unique = false
	# assert cxs.to_a == [[1,1], [1,1], [1,2], [1,1], [1,2], [2,2]]
	# ~~~~
	var are_unique = true is writable

	redef fun length
	do
		var n = collection.length
		if are_unique then
			if repeat > n then
				return 0
			end
			if are_sorted then
				return n.factorial / repeat.factorial
			else
				return n.factorial / (n-repeat).factorial
			end
		else
			if are_sorted then
				return (n+repeat-1).factorial / repeat.factorial / (n-1).factorial
			else
				return n ** repeat
			end
		end
	end

	redef fun iterator do
		return new CombinationIterator[E](self)
	end
end

private class CombinationIterator[E]
	super Iterator[SequenceRead[E]]
	var product: CombinationCollection[E]

	var iterators = new Array[Iterator[E]]
	var indices = new Array[Int]

	var are_sorted: Bool is noinit
	var are_unique: Bool is noinit

	init
	do
		are_sorted = product.are_sorted
		are_unique = product.are_unique

		for rank in [0..product.repeat[ do
			reset_iterator(rank)
		end
	end

	redef var is_ok = true

	redef fun item
	do
		var len = product.repeat
		var res = new Array[E].with_capacity(len)
		for i in [0..len[ do
			var it = iterators[i]
			res.add(it.item)
		end
		return res
	end

	redef fun next
	do
		var rank = product.repeat - 1

		loop
			var it = iterators[rank]

			if are_unique and not are_sorted then
				var idx = indices[rank] + 1
				it.next
				var adv = next_free(rank, idx)
				for i in [idx..adv[ do it.next
				indices[rank] = adv
			else
				it.next
				indices[rank] += 1
			end

			if it.is_ok then break
			if rank == 0 then
				is_ok = false
				return
			end
			rank -= 1
		end

		for r in [rank+1..product.repeat[ do
			reset_iterator(r)
		end
	end

	fun next_free(rank: Int, start: Int): Int
	do
		loop
			for i in [0..rank[ do
				if indices[i] == start then
					start += 1
					continue label
				end
			end
			break label
		end label
		return start
	end

	fun reset_iterator(rank: Int): Iterator[E]
	do
		var it = product.collection.iterator
		iterators[rank] = it
		var skip = 0

		if (not are_sorted and not are_unique) or rank == 0 then
			# DO NOTHING
		else if are_sorted and are_unique then
			skip = indices[rank-1] + 1
		else if are_sorted then
			skip = indices[rank-1]
		else
			skip = next_free(rank, 0)
		end

		for i in [0..skip[ do it.next
		indices[rank] = skip
		if not it.is_ok then is_ok = false
		return it
	end

	fun need_skip: Bool
	do
		if not are_sorted and not are_unique then
			return false
		else if are_sorted and are_unique then
			var max = -1
			for i in indices do
				if i <= max then return true
				max = i
			end
			return false
		else if are_sorted then
			var max = -1
			for i in indices do
				if i < max then return true
				max = i
			end
			return false
		else
			# are_unique
			for i in indices do
				if indices.count(i) > 1 then return true
			end
			return false
		end
	end
end
lib/combinations/combinations.nit:11,1--439,3