lib: add combinations.nit for combinatoric collections
authorJean Privat <jean@pryen.org>
Sat, 8 Nov 2014 03:35:14 +0000 (22:35 -0500)
committerJean Privat <jean@pryen.org>
Mon, 10 Nov 2014 15:54:02 +0000 (10:54 -0500)
Signed-off-by: Jean Privat <jean@pryen.org>

lib/combinations.nit [new file with mode: 0644]

diff --git a/lib/combinations.nit b/lib/combinations.nit
new file mode 100644 (file)
index 0000000..4076c03
--- /dev/null
@@ -0,0 +1,439 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# This file is free software, which comes along with NIT.  This software is
+# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without  even  the implied warranty of  MERCHANTABILITY or  FITNESS FOR A
+# PARTICULAR PURPOSE.  You can modify it is you want,  provided this header
+# is kept unaltered, and a notification of the changes is added.
+# You  are  allowed  to  redistribute it and sell it, alone or is a part of
+# another product.
+
+# Cartesian products, combinations and permutation on collections.
+#
+# This module offers memory-efficient views on combinatoric collections.
+# Methods of the views create objects only when needed.
+# Moreover, produced objects during iterations are free to be collected and
+# their memory reused.
+#
+# This enable these views and method to works with very combinatoric large collections.
+#
+# When small combinatoric views need to be kept in memory (for fast access by example).
+# The `Collection::to_a` method and other related factories can be used to transform
+# the combinatoric views into extensive collections,
+module combinations
+
+redef class Collection[E]
+       # Cartesian product, over `r` times `self`.
+       #
+       # See `CartesianCollection` for details.
+       #
+       # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
+       fun product(r: Int): Collection[SequenceRead[E]]
+       do
+               return new CartesianCollection[E]([self]*r)
+       end
+
+       # All `r`-length permutations on self (all possible ordering) without repeated elements.
+       #
+       # See `CartesianCollection` for details.
+       #
+       # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
+       fun permutations(r: Int): Collection[SequenceRead[E]]
+       do
+               var res = new CombinationCollection[E](self, r)
+               res.are_sorted = false
+               res.are_unique = true
+               return res
+       end
+
+       # All `r`-length combinations on self (in same order) without repeated elements.
+       #
+       # See `CartesianCollection` for details.
+       #
+       # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
+       fun combinations(r: Int): Collection[SequenceRead[E]]
+       do
+               var res = new CombinationCollection[E](self, r)
+               res.are_sorted = true
+               res.are_unique = true
+               return res
+       end
+
+       # All `r`-length combination on self (in same order) with repeated elements.
+       #
+       # See `CartesianCollection` for details.
+       #
+       # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
+       fun combinations_with_replacement(r: Int): Collection[SequenceRead[E]]
+       do
+               var res = new CombinationCollection[E](self, r)
+               res.are_sorted = true
+               res.are_unique = false
+               return res
+       end
+end
+
+# A view of a Cartesian-product collection over homogeneous collections.
+#
+# Therefore, this view *generates* all the sequences of elements constructed by associating
+# en element for each one of the original collections.
+#
+# It is equivalent to doing nesting `for` for each collection.
+#
+# ~~~~
+# var xs = [1, 2, 3]
+# var ys = [8, 9]
+# var xys = new CartesianCollection[Int]([xs, ys])
+# assert xys.length == 6
+# assert xys.to_a == [[1,8], [1,9], [2,8], [2,9], [3,8], [3,9]]
+# ~~~~
+#
+# The pattern of the generate sequences produces a lexicographical order.
+#
+# Because it is a generator, it is memory-efficient and the sequences are created only when needed.
+#
+# Note: because it is a view, changes on the base collections are reflected on the view.
+#
+# ~~~~
+# assert xs.pop == 3
+# assert ys.pop == 9
+# assert xys.to_a == [[1,8], [2,8]]
+# ~~~~
+class CartesianCollection[E]
+       super Collection[SequenceRead[E]]
+
+       # The base collections used to generate the sequences.
+       var collections: SequenceRead[Collection[E]]
+
+       redef fun length
+       do
+               var res = 1
+               for c in collections do res = res * c.length
+               return res
+       end
+
+       redef fun iterator do return new CartesianIterator[E](self)
+end
+
+private class CartesianIterator[E]
+       super Iterator[SequenceRead[E]]
+       var collection: CartesianCollection[E]
+
+       # The array of iterations that will be increased in the lexicographic order.
+       private var iterators = new Array[Iterator[E]]
+
+       init
+       do
+               for c in collection.collections do
+                       var it = c.iterator
+                       iterators.add it
+                       if not it.is_ok then is_ok = false
+               end
+       end
+
+       redef var is_ok = true
+
+       redef fun item
+       do
+               var len = iterators.length
+               var res = new Array[E].with_capacity(len)
+               for i in [0..len[ do
+                       var it = iterators[i]
+                       res.add(it.item)
+               end
+               return res
+       end
+
+       redef fun next
+       do
+               var rank = iterators.length - 1
+
+               # Odometer-like increment starting from the last iterator
+               loop
+                       var it = iterators[rank]
+                       it.next
+                       if it.is_ok then return
+
+                       # The iterator if over
+                       if rank == 0 then
+                               # It it is the first, then the whole thing is over
+                               is_ok = false
+                               return
+                       end
+
+                       # If not, restart the iterator and increment the previous one
+                       # (like a carry)
+                       iterators[rank] = collection.collections[rank].iterator
+                       rank -= 1
+               end
+       end
+end
+
+# A view of some combinations over a base collections.
+#
+# This view *generates* some combinations and permutations on a collection.
+#
+# By default, the generated sequences are combinations:
+#
+#   * each sequence has a length of `repeat`
+#   * elements are in sorted order (see `are_sorted` for details)
+#   * no repeated element (see `are_unique` for details)
+#
+# ~~~~
+# var xs = [1, 2, 3]
+# var cxs = new CombinationCollection[Int](xs, 2)
+# assert cxs.length == 3
+# assert cxs.to_a == [[1,2], [1,3], [2,3]]
+# ~~~~
+#
+# Other kind of combinations can be generated by tweaking the attributes `are_sorted` and `are_unique`.
+#
+# * for permutation:
+#
+# ~~~~
+# cxs.are_sorted = false
+# cxs.are_unique = true
+# assert cxs.length == 6
+# assert cxs.to_a == [[1,2], [1,3], [2,1], [2,3], [3,1], [3,2]]
+# ~~~~
+#
+# * for combinations with replacement:
+#
+# ~~~~
+# cxs.are_sorted = true
+# cxs.are_unique = false
+# assert cxs.length == 6
+# assert cxs.to_a == [[1,1], [1,2], [1,3], [2,2], [2,3], [3,3]]
+# ~~~~
+#
+# * for product:
+#
+# ~~~~
+# cxs.are_sorted = false
+# cxs.are_unique = false
+# assert cxs.length == 9
+# assert cxs.to_a == [[1,1], [1,2], [1,3], [2,1], [2,2], [2,3], [3,1], [3,2], [3,3]]
+# ~~~~
+#
+# However, in the last case, a faster alternative is to use `CartesianCollection`:
+#
+# ~~~~
+# var cp = new CartesianCollection[Int]([xs] * 2)
+# assert cp.to_a == cxs.to_a
+# ~~~~
+#
+# As seen in the examples, the patterns of the generated sequences produce a lexicographical order.
+#
+# Because it is a generator, it is memory-efficient and the sequences are created only when needed.
+#
+# Note: because it is a view, changes on the base collection are reflected on the view.
+#
+# ~~~~
+# assert xs.pop == 3
+# cxs.are_sorted = true
+# cxs.are_unique = true
+# assert cxs.to_a == [[1,2]]
+# ~~~~
+class CombinationCollection[E]
+       super Collection[SequenceRead[E]]
+
+       # The base collection used to generate the sequences.
+       var collection: Collection[E]
+
+       # The maximum length of each generated sequence.
+       var repeat: Int
+
+       init
+       do
+               assert repeat >= 0
+       end
+
+       # Are the elements in the generated sequences sorted?
+       # Default `true`.
+       #
+       # When `true`, the original order is preserved.
+       #
+       # Elements are compared by their order in the base collection,
+       # not by their intrinsic value or comparability.
+       #
+       # ~~~~
+       # var xs = [1, 1, 2]
+       # var cxs = new CombinationCollection[Int](xs, 2)
+       # cxs.are_sorted = true
+       # assert cxs.to_a == [[1,1], [1,2], [1, 2]]
+       # cxs.are_sorted = false
+       # assert cxs.to_a == [[1,1], [1,2], [1, 1], [1, 2], [2, 1], [2, 1]]
+       # ~~~~
+       var are_sorted = true is writable
+
+       # Are the element in the generated sequence unique?
+       # Default `true`.
+       #
+       # When `true`, an element cannot be reused in the same sequence (no replacement).
+       #
+       # Elements are distinguished by their order in the base collection,
+       # not by their intrinsic value or equality.
+       #
+       # ~~~~
+       # var xs = [1, 1, 2]
+       # var cxs = new CombinationCollection[Int](xs, 2)
+       # cxs.are_unique = true
+       # assert cxs.to_a == [[1,1], [1,2], [1, 2]]
+       # cxs.are_unique = false
+       # assert cxs.to_a == [[1,1], [1,1], [1,2], [1,1], [1,2], [2,2]]
+       # ~~~~
+       var are_unique = true is writable
+
+       redef fun length
+       do
+               var n = collection.length
+               if are_unique then
+                       if repeat > n then
+                               return 0
+                       end
+                       if are_sorted then
+                               return n.factorial / repeat.factorial
+                       else
+                               return n.factorial / (n-repeat).factorial
+                       end
+               else
+                       if are_sorted then
+                               return (n+repeat-1).factorial / repeat.factorial / (n-1).factorial
+                       else
+                               return n ** repeat
+                       end
+               end
+       end
+
+       redef fun iterator do
+               return new CombinationIterator[E](self)
+       end
+end
+
+private class CombinationIterator[E]
+       super Iterator[SequenceRead[E]]
+       var product: CombinationCollection[E]
+
+       private var iterators = new Array[Iterator[E]]
+       private var indices = new Array[Int]
+
+       var are_sorted: Bool is noinit
+       var are_unique: Bool is noinit
+
+       init
+       do
+               are_sorted = product.are_sorted
+               are_unique = product.are_unique
+
+               for rank in [0..product.repeat[ do
+                       reset_iterator(rank)
+               end
+       end
+
+       redef var is_ok = true
+
+       redef fun item
+       do
+               var len = product.repeat
+               var res = new Array[E].with_capacity(len)
+               for i in [0..len[ do
+                       var it = iterators[i]
+                       res.add(it.item)
+               end
+               return res
+       end
+
+       redef fun next
+       do
+               var rank = product.repeat - 1
+
+               loop
+                       var it = iterators[rank]
+
+                       if are_unique and not are_sorted then
+                               var idx = indices[rank] + 1
+                               it.next
+                               var adv = next_free(rank, idx)
+                               for i in [idx..adv[ do it.next
+                               indices[rank] = adv
+                       else
+                               it.next
+                               indices[rank] += 1
+                       end
+
+                       if it.is_ok then break
+                       if rank == 0 then
+                               is_ok = false
+                               return
+                       end
+                       rank -= 1
+               end
+
+               for r in [rank+1..product.repeat[ do
+                       reset_iterator(r)
+               end
+       end
+
+       private fun next_free(rank: Int, start: Int): Int
+       do
+               loop
+                       for i in [0..rank[ do
+                               if indices[i] == start then
+                                       start += 1
+                                       continue label
+                               end
+                       end
+                       break label
+               end label
+               return start
+       end
+
+       private fun reset_iterator(rank: Int): Iterator[E]
+       do
+               var it = product.collection.iterator
+               iterators[rank] = it
+               var skip = 0
+
+               if (not are_sorted and not are_unique) or rank == 0 then
+                       # DO NOTHING
+               else if are_sorted and are_unique then
+                       skip = indices[rank-1] + 1
+               else if are_sorted then
+                       skip = indices[rank-1]
+               else
+                       skip = next_free(rank, 0)
+               end
+
+               for i in [0..skip[ do it.next
+               indices[rank] = skip
+               if not it.is_ok then is_ok = false
+               return it
+       end
+
+       fun need_skip: Bool
+       do
+               if not are_sorted and not are_unique then
+                       return false
+               else if are_sorted and are_unique then
+                       var max = -1
+                       for i in indices do
+                               if i <= max then return true
+                               max = i
+                       end
+                       return false
+               else if are_sorted then
+                       var max = -1
+                       for i in indices do
+                               if i < max then return true
+                               max = i
+                       end
+                       return false
+               else
+                       # are_unique
+                       for i in indices do
+                               if indices.count(i) > 1 then return true
+                       end
+                       return false
+               end
+       end
+end