From: Jean Privat Date: Sat, 8 Nov 2014 03:35:14 +0000 (-0500) Subject: lib: add combinations.nit for combinatoric collections X-Git-Tag: v0.6.11~44^2~2 X-Git-Url: http://nitlanguage.org lib: add combinations.nit for combinatoric collections Signed-off-by: Jean Privat --- diff --git a/lib/combinations.nit b/lib/combinations.nit new file mode 100644 index 0000000..4076c03 --- /dev/null +++ b/lib/combinations.nit @@ -0,0 +1,439 @@ +# This file is part of NIT ( http://www.nitlanguage.org ). +# +# This file is free software, which comes along with NIT. This software is +# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. You can modify it is you want, provided this header +# is kept unaltered, and a notification of the changes is added. +# You are allowed to redistribute it and sell it, alone or is a part of +# another product. + +# Cartesian products, combinations and permutation on collections. +# +# This module offers memory-efficient views on combinatoric collections. +# Methods of the views create objects only when needed. +# Moreover, produced objects during iterations are free to be collected and +# their memory reused. +# +# This enable these views and method to works with very combinatoric large collections. +# +# When small combinatoric views need to be kept in memory (for fast access by example). +# The `Collection::to_a` method and other related factories can be used to transform +# the combinatoric views into extensive collections, +module combinations + +redef class Collection[E] + # Cartesian product, over `r` times `self`. + # + # See `CartesianCollection` for details. + # + # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only. + fun product(r: Int): Collection[SequenceRead[E]] + do + return new CartesianCollection[E]([self]*r) + end + + # All `r`-length permutations on self (all possible ordering) without repeated elements. + # + # See `CartesianCollection` for details. + # + # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only. + fun permutations(r: Int): Collection[SequenceRead[E]] + do + var res = new CombinationCollection[E](self, r) + res.are_sorted = false + res.are_unique = true + return res + end + + # All `r`-length combinations on self (in same order) without repeated elements. + # + # See `CartesianCollection` for details. + # + # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only. + fun combinations(r: Int): Collection[SequenceRead[E]] + do + var res = new CombinationCollection[E](self, r) + res.are_sorted = true + res.are_unique = true + return res + end + + # All `r`-length combination on self (in same order) with repeated elements. + # + # See `CartesianCollection` for details. + # + # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only. + fun combinations_with_replacement(r: Int): Collection[SequenceRead[E]] + do + var res = new CombinationCollection[E](self, r) + res.are_sorted = true + res.are_unique = false + return res + end +end + +# A view of a Cartesian-product collection over homogeneous collections. +# +# Therefore, this view *generates* all the sequences of elements constructed by associating +# en element for each one of the original collections. +# +# It is equivalent to doing nesting `for` for each collection. +# +# ~~~~ +# var xs = [1, 2, 3] +# var ys = [8, 9] +# var xys = new CartesianCollection[Int]([xs, ys]) +# assert xys.length == 6 +# assert xys.to_a == [[1,8], [1,9], [2,8], [2,9], [3,8], [3,9]] +# ~~~~ +# +# The pattern of the generate sequences produces a lexicographical order. +# +# Because it is a generator, it is memory-efficient and the sequences are created only when needed. +# +# Note: because it is a view, changes on the base collections are reflected on the view. +# +# ~~~~ +# assert xs.pop == 3 +# assert ys.pop == 9 +# assert xys.to_a == [[1,8], [2,8]] +# ~~~~ +class CartesianCollection[E] + super Collection[SequenceRead[E]] + + # The base collections used to generate the sequences. + var collections: SequenceRead[Collection[E]] + + redef fun length + do + var res = 1 + for c in collections do res = res * c.length + return res + end + + redef fun iterator do return new CartesianIterator[E](self) +end + +private class CartesianIterator[E] + super Iterator[SequenceRead[E]] + var collection: CartesianCollection[E] + + # The array of iterations that will be increased in the lexicographic order. + private var iterators = new Array[Iterator[E]] + + init + do + for c in collection.collections do + var it = c.iterator + iterators.add it + if not it.is_ok then is_ok = false + end + end + + redef var is_ok = true + + redef fun item + do + var len = iterators.length + var res = new Array[E].with_capacity(len) + for i in [0..len[ do + var it = iterators[i] + res.add(it.item) + end + return res + end + + redef fun next + do + var rank = iterators.length - 1 + + # Odometer-like increment starting from the last iterator + loop + var it = iterators[rank] + it.next + if it.is_ok then return + + # The iterator if over + if rank == 0 then + # It it is the first, then the whole thing is over + is_ok = false + return + end + + # If not, restart the iterator and increment the previous one + # (like a carry) + iterators[rank] = collection.collections[rank].iterator + rank -= 1 + end + end +end + +# A view of some combinations over a base collections. +# +# This view *generates* some combinations and permutations on a collection. +# +# By default, the generated sequences are combinations: +# +# * each sequence has a length of `repeat` +# * elements are in sorted order (see `are_sorted` for details) +# * no repeated element (see `are_unique` for details) +# +# ~~~~ +# var xs = [1, 2, 3] +# var cxs = new CombinationCollection[Int](xs, 2) +# assert cxs.length == 3 +# assert cxs.to_a == [[1,2], [1,3], [2,3]] +# ~~~~ +# +# Other kind of combinations can be generated by tweaking the attributes `are_sorted` and `are_unique`. +# +# * for permutation: +# +# ~~~~ +# cxs.are_sorted = false +# cxs.are_unique = true +# assert cxs.length == 6 +# assert cxs.to_a == [[1,2], [1,3], [2,1], [2,3], [3,1], [3,2]] +# ~~~~ +# +# * for combinations with replacement: +# +# ~~~~ +# cxs.are_sorted = true +# cxs.are_unique = false +# assert cxs.length == 6 +# assert cxs.to_a == [[1,1], [1,2], [1,3], [2,2], [2,3], [3,3]] +# ~~~~ +# +# * for product: +# +# ~~~~ +# cxs.are_sorted = false +# cxs.are_unique = false +# assert cxs.length == 9 +# assert cxs.to_a == [[1,1], [1,2], [1,3], [2,1], [2,2], [2,3], [3,1], [3,2], [3,3]] +# ~~~~ +# +# However, in the last case, a faster alternative is to use `CartesianCollection`: +# +# ~~~~ +# var cp = new CartesianCollection[Int]([xs] * 2) +# assert cp.to_a == cxs.to_a +# ~~~~ +# +# As seen in the examples, the patterns of the generated sequences produce a lexicographical order. +# +# Because it is a generator, it is memory-efficient and the sequences are created only when needed. +# +# Note: because it is a view, changes on the base collection are reflected on the view. +# +# ~~~~ +# assert xs.pop == 3 +# cxs.are_sorted = true +# cxs.are_unique = true +# assert cxs.to_a == [[1,2]] +# ~~~~ +class CombinationCollection[E] + super Collection[SequenceRead[E]] + + # The base collection used to generate the sequences. + var collection: Collection[E] + + # The maximum length of each generated sequence. + var repeat: Int + + init + do + assert repeat >= 0 + end + + # Are the elements in the generated sequences sorted? + # Default `true`. + # + # When `true`, the original order is preserved. + # + # Elements are compared by their order in the base collection, + # not by their intrinsic value or comparability. + # + # ~~~~ + # var xs = [1, 1, 2] + # var cxs = new CombinationCollection[Int](xs, 2) + # cxs.are_sorted = true + # assert cxs.to_a == [[1,1], [1,2], [1, 2]] + # cxs.are_sorted = false + # assert cxs.to_a == [[1,1], [1,2], [1, 1], [1, 2], [2, 1], [2, 1]] + # ~~~~ + var are_sorted = true is writable + + # Are the element in the generated sequence unique? + # Default `true`. + # + # When `true`, an element cannot be reused in the same sequence (no replacement). + # + # Elements are distinguished by their order in the base collection, + # not by their intrinsic value or equality. + # + # ~~~~ + # var xs = [1, 1, 2] + # var cxs = new CombinationCollection[Int](xs, 2) + # cxs.are_unique = true + # assert cxs.to_a == [[1,1], [1,2], [1, 2]] + # cxs.are_unique = false + # assert cxs.to_a == [[1,1], [1,1], [1,2], [1,1], [1,2], [2,2]] + # ~~~~ + var are_unique = true is writable + + redef fun length + do + var n = collection.length + if are_unique then + if repeat > n then + return 0 + end + if are_sorted then + return n.factorial / repeat.factorial + else + return n.factorial / (n-repeat).factorial + end + else + if are_sorted then + return (n+repeat-1).factorial / repeat.factorial / (n-1).factorial + else + return n ** repeat + end + end + end + + redef fun iterator do + return new CombinationIterator[E](self) + end +end + +private class CombinationIterator[E] + super Iterator[SequenceRead[E]] + var product: CombinationCollection[E] + + private var iterators = new Array[Iterator[E]] + private var indices = new Array[Int] + + var are_sorted: Bool is noinit + var are_unique: Bool is noinit + + init + do + are_sorted = product.are_sorted + are_unique = product.are_unique + + for rank in [0..product.repeat[ do + reset_iterator(rank) + end + end + + redef var is_ok = true + + redef fun item + do + var len = product.repeat + var res = new Array[E].with_capacity(len) + for i in [0..len[ do + var it = iterators[i] + res.add(it.item) + end + return res + end + + redef fun next + do + var rank = product.repeat - 1 + + loop + var it = iterators[rank] + + if are_unique and not are_sorted then + var idx = indices[rank] + 1 + it.next + var adv = next_free(rank, idx) + for i in [idx..adv[ do it.next + indices[rank] = adv + else + it.next + indices[rank] += 1 + end + + if it.is_ok then break + if rank == 0 then + is_ok = false + return + end + rank -= 1 + end + + for r in [rank+1..product.repeat[ do + reset_iterator(r) + end + end + + private fun next_free(rank: Int, start: Int): Int + do + loop + for i in [0..rank[ do + if indices[i] == start then + start += 1 + continue label + end + end + break label + end label + return start + end + + private fun reset_iterator(rank: Int): Iterator[E] + do + var it = product.collection.iterator + iterators[rank] = it + var skip = 0 + + if (not are_sorted and not are_unique) or rank == 0 then + # DO NOTHING + else if are_sorted and are_unique then + skip = indices[rank-1] + 1 + else if are_sorted then + skip = indices[rank-1] + else + skip = next_free(rank, 0) + end + + for i in [0..skip[ do it.next + indices[rank] = skip + if not it.is_ok then is_ok = false + return it + end + + fun need_skip: Bool + do + if not are_sorted and not are_unique then + return false + else if are_sorted and are_unique then + var max = -1 + for i in indices do + if i <= max then return true + max = i + end + return false + else if are_sorted then + var max = -1 + for i in indices do + if i < max then return true + max = i + end + return false + else + # are_unique + for i in indices do + if indices.count(i) > 1 then return true + end + return false + end + end +end