cb1612ae90509563f5ff7e9a46cb9ae340c7b348
[nit.git] / lib / combinations.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Cartesian products, combinations and permutation on collections.
12 #
13 # This module offers memory-efficient views on combinatoric collections.
14 # Methods of the views create objects only when needed.
15 # Moreover, produced objects during iterations are free to be collected and
16 # their memory reused.
17 #
18 # This enable these views and method to works with very combinatoric large collections.
19 #
20 # When small combinatoric views need to be kept in memory (for fast access by example).
21 # The `Collection::to_a` method and other related factories can be used to transform
22 # the combinatoric views into extensive collections,
23 module combinations
24
25 redef class Collection[E]
26 # Cartesian product, over `r` times `self`.
27 #
28 # See `CartesianCollection` for details.
29 #
30 # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
31 fun product(r: Int): Collection[SequenceRead[E]]
32 do
33 return new CartesianCollection[E]([self]*r)
34 end
35
36 # All `r`-length permutations on self (all possible ordering) without repeated elements.
37 #
38 # See `CartesianCollection` for details.
39 #
40 # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
41 fun permutations(r: Int): Collection[SequenceRead[E]]
42 do
43 var res = new CombinationCollection[E](self, r)
44 res.are_sorted = false
45 res.are_unique = true
46 return res
47 end
48
49 # All `r`-length combinations on self (in same order) without repeated elements.
50 #
51 # See `CartesianCollection` for details.
52 #
53 # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
54 fun combinations(r: Int): Collection[SequenceRead[E]]
55 do
56 var res = new CombinationCollection[E](self, r)
57 res.are_sorted = true
58 res.are_unique = true
59 return res
60 end
61
62 # All `r`-length combination on self (in same order) with repeated elements.
63 #
64 # See `CartesianCollection` for details.
65 #
66 # FIXME: Cannot be used if RTA is enabled. So `niti` or `--erasure` only.
67 fun combinations_with_replacement(r: Int): Collection[SequenceRead[E]]
68 do
69 var res = new CombinationCollection[E](self, r)
70 res.are_sorted = true
71 res.are_unique = false
72 return res
73 end
74 end
75
76 # A view of a Cartesian-product collection over homogeneous collections.
77 #
78 # Therefore, this view *generates* all the sequences of elements constructed by associating
79 # en element for each one of the original collections.
80 #
81 # It is equivalent to doing nesting `for` for each collection.
82 #
83 # ~~~~
84 # var xs = [1, 2, 3]
85 # var ys = [8, 9]
86 # var xys = new CartesianCollection[Int]([xs, ys])
87 # assert xys.length == 6
88 # assert xys.to_a == [[1,8], [1,9], [2,8], [2,9], [3,8], [3,9]]
89 # ~~~~
90 #
91 # The pattern of the generate sequences produces a lexicographical order.
92 #
93 # Because it is a generator, it is memory-efficient and the sequences are created only when needed.
94 #
95 # Note: because it is a view, changes on the base collections are reflected on the view.
96 #
97 # ~~~~
98 # assert xs.pop == 3
99 # assert ys.pop == 9
100 # assert xys.to_a == [[1,8], [2,8]]
101 # ~~~~
102 class CartesianCollection[E]
103 super Collection[SequenceRead[E]]
104
105 # The base collections used to generate the sequences.
106 var collections: SequenceRead[Collection[E]]
107
108 redef fun length
109 do
110 var res = 1
111 for c in collections do res = res * c.length
112 return res
113 end
114
115 redef fun iterator do return new CartesianIterator[E](self)
116 end
117
118 private class CartesianIterator[E]
119 super Iterator[SequenceRead[E]]
120 var collection: CartesianCollection[E]
121
122 # The array of iterations that will be increased in the lexicographic order.
123 var iterators = new Array[Iterator[E]]
124
125 init
126 do
127 for c in collection.collections do
128 var it = c.iterator
129 iterators.add it
130 if not it.is_ok then is_ok = false
131 end
132 end
133
134 redef var is_ok = true
135
136 redef fun item
137 do
138 var len = iterators.length
139 var res = new Array[E].with_capacity(len)
140 for i in [0..len[ do
141 var it = iterators[i]
142 res.add(it.item)
143 end
144 return res
145 end
146
147 redef fun next
148 do
149 var rank = iterators.length - 1
150
151 # Odometer-like increment starting from the last iterator
152 loop
153 var it = iterators[rank]
154 it.next
155 if it.is_ok then return
156
157 # The iterator if over
158 if rank == 0 then
159 # It it is the first, then the whole thing is over
160 is_ok = false
161 return
162 end
163
164 # If not, restart the iterator and increment the previous one
165 # (like a carry)
166 iterators[rank] = collection.collections[rank].iterator
167 rank -= 1
168 end
169 end
170 end
171
172 # A view of some combinations over a base collections.
173 #
174 # This view *generates* some combinations and permutations on a collection.
175 #
176 # By default, the generated sequences are combinations:
177 #
178 # * each sequence has a length of `repeat`
179 # * elements are in sorted order (see `are_sorted` for details)
180 # * no repeated element (see `are_unique` for details)
181 #
182 # ~~~~
183 # var xs = [1, 2, 3]
184 # var cxs = new CombinationCollection[Int](xs, 2)
185 # assert cxs.length == 3
186 # assert cxs.to_a == [[1,2], [1,3], [2,3]]
187 # ~~~~
188 #
189 # Other kind of combinations can be generated by tweaking the attributes `are_sorted` and `are_unique`.
190 #
191 # * for permutation:
192 #
193 # ~~~~
194 # cxs.are_sorted = false
195 # cxs.are_unique = true
196 # assert cxs.length == 6
197 # assert cxs.to_a == [[1,2], [1,3], [2,1], [2,3], [3,1], [3,2]]
198 # ~~~~
199 #
200 # * for combinations with replacement:
201 #
202 # ~~~~
203 # cxs.are_sorted = true
204 # cxs.are_unique = false
205 # assert cxs.length == 6
206 # assert cxs.to_a == [[1,1], [1,2], [1,3], [2,2], [2,3], [3,3]]
207 # ~~~~
208 #
209 # * for product:
210 #
211 # ~~~~
212 # cxs.are_sorted = false
213 # cxs.are_unique = false
214 # assert cxs.length == 9
215 # assert cxs.to_a == [[1,1], [1,2], [1,3], [2,1], [2,2], [2,3], [3,1], [3,2], [3,3]]
216 # ~~~~
217 #
218 # However, in the last case, a faster alternative is to use `CartesianCollection`:
219 #
220 # ~~~~
221 # var cp = new CartesianCollection[Int]([xs] * 2)
222 # assert cp.to_a == cxs.to_a
223 # ~~~~
224 #
225 # As seen in the examples, the patterns of the generated sequences produce a lexicographical order.
226 #
227 # Because it is a generator, it is memory-efficient and the sequences are created only when needed.
228 #
229 # Note: because it is a view, changes on the base collection are reflected on the view.
230 #
231 # ~~~~
232 # assert xs.pop == 3
233 # cxs.are_sorted = true
234 # cxs.are_unique = true
235 # assert cxs.to_a == [[1,2]]
236 # ~~~~
237 class CombinationCollection[E]
238 super Collection[SequenceRead[E]]
239
240 # The base collection used to generate the sequences.
241 var collection: Collection[E]
242
243 # The maximum length of each generated sequence.
244 var repeat: Int
245
246 init
247 do
248 assert repeat >= 0
249 end
250
251 # Are the elements in the generated sequences sorted?
252 # Default `true`.
253 #
254 # When `true`, the original order is preserved.
255 #
256 # Elements are compared by their order in the base collection,
257 # not by their intrinsic value or comparability.
258 #
259 # ~~~~
260 # var xs = [1, 1, 2]
261 # var cxs = new CombinationCollection[Int](xs, 2)
262 # cxs.are_sorted = true
263 # assert cxs.to_a == [[1,1], [1,2], [1, 2]]
264 # cxs.are_sorted = false
265 # assert cxs.to_a == [[1,1], [1,2], [1, 1], [1, 2], [2, 1], [2, 1]]
266 # ~~~~
267 var are_sorted = true is writable
268
269 # Are the element in the generated sequence unique?
270 # Default `true`.
271 #
272 # When `true`, an element cannot be reused in the same sequence (no replacement).
273 #
274 # Elements are distinguished by their order in the base collection,
275 # not by their intrinsic value or equality.
276 #
277 # ~~~~
278 # var xs = [1, 1, 2]
279 # var cxs = new CombinationCollection[Int](xs, 2)
280 # cxs.are_unique = true
281 # assert cxs.to_a == [[1,1], [1,2], [1, 2]]
282 # cxs.are_unique = false
283 # assert cxs.to_a == [[1,1], [1,1], [1,2], [1,1], [1,2], [2,2]]
284 # ~~~~
285 var are_unique = true is writable
286
287 redef fun length
288 do
289 var n = collection.length
290 if are_unique then
291 if repeat > n then
292 return 0
293 end
294 if are_sorted then
295 return n.factorial / repeat.factorial
296 else
297 return n.factorial / (n-repeat).factorial
298 end
299 else
300 if are_sorted then
301 return (n+repeat-1).factorial / repeat.factorial / (n-1).factorial
302 else
303 return n ** repeat
304 end
305 end
306 end
307
308 redef fun iterator do
309 return new CombinationIterator[E](self)
310 end
311 end
312
313 private class CombinationIterator[E]
314 super Iterator[SequenceRead[E]]
315 var product: CombinationCollection[E]
316
317 var iterators = new Array[Iterator[E]]
318 var indices = new Array[Int]
319
320 var are_sorted: Bool is noinit
321 var are_unique: Bool is noinit
322
323 init
324 do
325 are_sorted = product.are_sorted
326 are_unique = product.are_unique
327
328 for rank in [0..product.repeat[ do
329 reset_iterator(rank)
330 end
331 end
332
333 redef var is_ok = true
334
335 redef fun item
336 do
337 var len = product.repeat
338 var res = new Array[E].with_capacity(len)
339 for i in [0..len[ do
340 var it = iterators[i]
341 res.add(it.item)
342 end
343 return res
344 end
345
346 redef fun next
347 do
348 var rank = product.repeat - 1
349
350 loop
351 var it = iterators[rank]
352
353 if are_unique and not are_sorted then
354 var idx = indices[rank] + 1
355 it.next
356 var adv = next_free(rank, idx)
357 for i in [idx..adv[ do it.next
358 indices[rank] = adv
359 else
360 it.next
361 indices[rank] += 1
362 end
363
364 if it.is_ok then break
365 if rank == 0 then
366 is_ok = false
367 return
368 end
369 rank -= 1
370 end
371
372 for r in [rank+1..product.repeat[ do
373 reset_iterator(r)
374 end
375 end
376
377 fun next_free(rank: Int, start: Int): Int
378 do
379 loop
380 for i in [0..rank[ do
381 if indices[i] == start then
382 start += 1
383 continue label
384 end
385 end
386 break label
387 end label
388 return start
389 end
390
391 fun reset_iterator(rank: Int): Iterator[E]
392 do
393 var it = product.collection.iterator
394 iterators[rank] = it
395 var skip = 0
396
397 if (not are_sorted and not are_unique) or rank == 0 then
398 # DO NOTHING
399 else if are_sorted and are_unique then
400 skip = indices[rank-1] + 1
401 else if are_sorted then
402 skip = indices[rank-1]
403 else
404 skip = next_free(rank, 0)
405 end
406
407 for i in [0..skip[ do it.next
408 indices[rank] = skip
409 if not it.is_ok then is_ok = false
410 return it
411 end
412
413 fun need_skip: Bool
414 do
415 if not are_sorted and not are_unique then
416 return false
417 else if are_sorted and are_unique then
418 var max = -1
419 for i in indices do
420 if i <= max then return true
421 max = i
422 end
423 return false
424 else if are_sorted then
425 var max = -1
426 for i in indices do
427 if i < max then return true
428 max = i
429 end
430 return false
431 else
432 # are_unique
433 for i in indices do
434 if indices.count(i) > 1 then return true
435 end
436 return false
437 end
438 end
439 end