examples: annotate examples
[nit.git] / lib / pipeline.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Pipelined filters and operations on iterators.
16 #
17 # This module enhances `Iterator` with some methods that enable a pipeline-like programing.
18 # The processing of elements in a pipeline is done trough connected filters that are implemented with reasonable memory constraints.
19 module pipeline
20
21 redef interface Iterator[E]
22 # Filter: sort with `default_comparator`.
23 # SEE: `sort_with` for details
24 # REQUIRE: self isa Iterator[Comparable]
25 #
26 # assert [1,3,2].iterator.sort.to_a == [1,2,3]
27 fun sort: Iterator[E]
28 do
29 assert self isa Iterator[Comparable]
30 var a = self.to_a
31 default_comparator.sort(a)
32 return a.iterator
33 end
34
35 # Filter: sort with a given `comparator`.
36 # Important: require O(n) memory.
37 #
38 # assert ["a", "c", "b"].iterator.sort_with(alpha_comparator).to_a == ["a", "b", "c"]
39 fun sort_with(comparator: Comparator): Iterator[E]
40 do
41 var a = self.to_a
42 comparator.sort(a)
43 return a.iterator
44 end
45
46 # Filter: reject duplicates.
47 # Elements already seen are rejected.
48 #
49 # Important: rely on `==` and `hash`
50 # Important: require O(m) in memory, where m is the total number of uniq items.
51 #
52 # assert [1,2,1,1,1,3,2].iterator.uniq.to_a == [1,2,3]
53 #
54 # REQUIRE: self isa Iterator[Object]
55 fun uniq: Iterator[E]
56 do
57 assert self isa Iterator[Object]
58 return new PipeUniq[E](self)
59 end
60
61 # Filter: reject continuous sequences of duplicates
62 #
63 # Important: rely on `==`.
64 #
65 # assert [1,2,1,1,1,3,2].iterator.seq_uniq.to_a == [1,2,1,3,2]
66 fun seq_uniq: Iterator[E]
67 do
68 return new PipeSeqUniq[E](self)
69 end
70
71 # Combine two iterators.
72 #
73 # When the first iterator is terminated, the second is started.
74 #
75 # assert ([1..20[.iterator + [20..40[.iterator).to_a == ([1..40[).to_a
76 #
77 # SEE: `Iterator2`
78 fun +(other: Iterator[E]): Iterator[E]
79 do
80 return new PipeJoin[E](self, other)
81 end
82
83 # Alternate each item with `e`.
84 #
85 # assert [1,2,3].iterator.alternate(0).to_a == [1,0,2,0,3]
86 fun alternate(e: E): Iterator[E]
87 do
88 return new PipeAlternate[E](self, e)
89 end
90
91 # Filter: reject a given `item`.
92 #
93 # assert [1,1,2,1,3].iterator.skip(1).to_a == [2,3]
94 fun skip(item: E): Iterator[E]
95 do
96 return new PipeSkip[E](self, item)
97 end
98
99 # Filter: keep only the first `length` items.
100 #
101 # This filter does not always consume `self'.
102 #
103 # var i = [1,2,3,4,5].iterator
104 # assert i.head(2).to_a == [1,2]
105 # assert i.to_a == [3,4,5]
106 fun head(length: Int): Iterator[E]
107 do
108 return new PipeHead[E](self, length)
109 end
110
111 # Filter: reject the first `length` items.
112 #
113 # assert [1,2,3,4,5].iterator.skip_head(2).to_a == [3,4,5]
114 #
115 # ENSURE: self == return
116 fun skip_head(length: Int): Iterator[E]
117 do
118 while length > 0 and self.is_ok do
119 length -= 1
120 self.next
121 end
122 return self
123 end
124
125 # Filter: keep only the last `length` items.
126 #
127 # assert [1,2,3,4,5].iterator.tail(2).to_a == [4,5]
128 #
129 # Important: require O(length) in memory
130 fun tail(length: Int): Iterator[E]
131 do
132 var lasts = new List[E]
133 while self.is_ok do
134 while lasts.length >= length do lasts.shift
135 lasts.push(self.item)
136 self.next
137 end
138 return lasts.iterator
139 end
140
141 # Filter: reject the last `length` items.
142 #
143 # assert [1,2,3,4,5].iterator.skip_tail(2).to_a == [1,2,3]
144 #
145 # Important: require O(length) in memory
146 fun skip_tail(length: Int): Iterator[E]
147 do
148 return new PipeSkipTail[E](self, length)
149 end
150
151 # Filter: reject items that does not meet some criteria.
152 #
153 # class IsEvenFunction
154 # super Function[Int, Bool]
155 # redef fun apply(i) do return i % 2 == 0
156 # end
157 # assert [1,2,3,4,8].iterator.select(new IsEvenFunction).to_a == [2,4,8]
158 fun select(predicate: Function[E, Bool]): Iterator[E]
159 do
160 return new PipeSelect[E](self, predicate)
161 end
162 end
163
164 # Concatenates a sequence of iterators.
165 #
166 # Wraps an iterator of sub-iterators and iterates over the elements of the
167 # sub-iterators.
168 #
169 # ~~~nit
170 # var i: Iterator[Int]
171 # var empty = new Array[Int]
172 #
173 # i = new Iterator2[Int]([
174 # [1, 2, 3].iterator,
175 # empty.iterator,
176 # [4, 5].iterator
177 # ].iterator)
178 # assert i.to_a == [1, 2, 3, 4, 5]
179 #
180 # i = new Iterator2[Int]([
181 # empty.iterator,
182 # [42].iterator,
183 # empty.iterator
184 # ].iterator)
185 # assert i.to_a == [42]
186 # ~~~
187 #
188 # SEE: `Iterator::+`
189 class Iterator2[E]
190 super Iterator[E]
191
192 # The inner iterator over sub-iterators.
193 var inner: Iterator[Iterator[E]]
194
195 redef fun finish
196 do
197 var i = current_iterator
198 if i != null then i.finish
199 end
200
201 redef fun is_ok
202 do
203 var i = current_iterator
204 if i == null then return false
205 return i.is_ok
206 end
207
208 redef fun item
209 do
210 var i = current_iterator
211 assert i != null
212 return i.item
213 end
214
215 redef fun next
216 do
217 var i = current_iterator
218 assert i != null
219 i.next
220 end
221
222 redef fun start
223 do
224 var i = current_iterator
225 if i != null then i.start
226 end
227
228 private var previous_iterator: nullable Iterator[E] = null
229
230 private fun current_iterator: nullable Iterator[E]
231 do
232 if previous_iterator == null then
233 # Get the first sub-iterator.
234 if inner.is_ok then
235 previous_iterator = inner.item
236 previous_iterator.start
237 inner.next
238 else
239 return null
240 end
241 end
242 # Get the first sub-iterator that has a current item.
243 while inner.is_ok and not previous_iterator.is_ok do
244 previous_iterator.finish
245 previous_iterator = inner.item
246 previous_iterator.start
247 inner.next
248 end
249 return previous_iterator
250 end
251 end
252
253 # Wraps an iterator to skip nulls.
254 #
255 # ~~~nit
256 # var i: Iterator[Int]
257 #
258 # i = new NullSkipper[Int]([null, 1, null, 2, null: nullable Int].iterator)
259 # assert i.to_a == [1, 2]
260 #
261 # i = new NullSkipper[Int]([1, null, 2, 3: nullable Int].iterator)
262 # assert i.to_a == [1, 2, 3]
263 # ~~~
264 class NullSkipper[E: Object]
265 super Iterator[E]
266
267 # The inner iterator.
268 var inner: Iterator[nullable E]
269
270 redef fun finish do inner.finish
271
272 redef fun is_ok do
273 skip_nulls
274 return inner.is_ok
275 end
276
277 redef fun item do
278 skip_nulls
279 return inner.item.as(E)
280 end
281
282 redef fun next do
283 inner.next
284 skip_nulls
285 end
286
287 private fun skip_nulls do
288 while inner.is_ok and inner.item == null do inner.next
289 end
290 end
291
292 # Interface that reify a function.
293 # Concrete subclasses must implements the `apply` method.
294 #
295 # This interface helps to manipulate function-like objects.
296 #
297 # The main usage it as a transformation; that takes an argument and produce a result.
298 # See `map` for example.
299 #
300 # Another usage is as a predicate, with `Function[E, Bool]`.
301 # See `Iterator::select` for example.
302 #
303 # Function with more than one argument can be reified with some uncurification.
304 # Eg. `Function[ARG1, Function[ARG2, RES]]`.
305 #
306 # NOTE: Nit is not a functionnal language, this class is a very basic way to
307 # simulate the reification of a simple function.
308 interface Function[FROM, TO]
309 # How an element is mapped to another one.
310 fun apply(e: FROM): TO is abstract
311
312 # Filter: produce an iterator which each element is transformed.
313 #
314 # var i = [1,2,3].iterator
315 # assert fun_to_s.map(i).to_a == ["1", "2", "3"]
316 #
317 # Note: because there is no generic method in Nit (yet?),
318 # there is no way to have a better API.
319 # eg. with the Iterator as receiver and the function as argument.
320 # (see `Iterator::select`)
321 fun map(i: Iterator[FROM]): Iterator[TO]
322 do
323 return new PipeMap[FROM, TO](i, self)
324 end
325 end
326
327 private class FunctionToS
328 super Function[Object, String]
329 redef fun apply(e) do return e.to_s
330 end
331
332 ### Specific private iterator classes
333
334 private class PipeUniq[E]
335 super Iterator[E]
336
337 var source: Iterator[E]
338
339 var seen = new HashSet[Object] # FIXME HashSet[E]
340
341 redef fun is_ok do return source.is_ok
342
343 redef fun item do return source.item
344
345 redef fun next
346 do
347 self.seen.add(self.item.as(Object))
348 source.next
349 while source.is_ok and self.seen.has(source.item.as(Object)) do
350 source.next
351 end
352 end
353 end
354
355 private class PipeSeqUniq[E]
356 super Iterator[E]
357
358 var source: Iterator[E]
359
360 redef fun is_ok do return source.is_ok
361
362 redef fun item do return source.item
363
364 redef fun next
365 do
366 var seen = self.item
367 source.next
368 while source.is_ok and seen == source.item do
369 source.next
370 end
371 end
372 end
373
374 private class PipeJoin[E]
375 super Iterator[E]
376 var source1: Iterator[E]
377 var source2: Iterator[E]
378
379 redef fun is_ok
380 do
381 return source1.is_ok or source2.is_ok
382 end
383
384 redef fun item
385 do
386 if source1.is_ok then return source1.item else return source2.item
387 end
388
389 redef fun next
390 do
391 if source1.is_ok then source1.next else source2.next
392 end
393 end
394
395 private class PipeAlternate[E]
396 super Iterator[E]
397
398 var source: Iterator[E]
399 var odd_item: E
400 var odd = true
401
402 redef fun is_ok do return source.is_ok
403
404 redef fun item
405 do
406 if odd then
407 return source.item
408 else
409 return odd_item
410 end
411 end
412
413 redef fun next
414 do
415 if odd then
416 source.next
417 end
418 odd = not odd
419 end
420 end
421
422 private class PipeSkip[E]
423 super Iterator[E]
424
425 var source: Iterator[E]
426 var skip_item: E
427
428 init do do_skip
429
430 fun do_skip
431 do
432 while source.is_ok and source.item == skip_item do source.next
433 end
434
435 redef fun is_ok do return source.is_ok
436
437 redef fun item do return source.item
438
439 redef fun next
440 do
441 source.next
442 do_skip
443 end
444 end
445
446 private class PipeHead[E]
447 super Iterator[E]
448
449 var source: Iterator[E]
450
451 var length: Int
452
453 redef fun is_ok do return length > 0 and source.is_ok
454
455 redef fun item do return source.item
456
457 redef fun next
458 do
459 length -= 1
460 source.next
461 end
462 end
463
464 private class PipeSkipTail[E]
465 super Iterator[E]
466
467 var source: Iterator[E]
468
469 var length: Int
470
471 var lasts = new List[E]
472
473 init
474 do
475 var lasts = self.lasts
476 while source.is_ok and lasts.length < length do
477 lasts.push(source.item)
478 source.next
479 end
480 end
481
482 redef fun is_ok do return source.is_ok
483
484 redef fun item do return lasts.first
485
486 redef fun next
487 do
488 lasts.shift
489 lasts.push(source.item)
490 source.next
491 end
492 end
493
494 private class PipeSelect[E]
495 super Iterator[E]
496
497 var source: Iterator[E]
498
499 var predicate: Function[E, Bool]
500
501 init do do_skip
502
503 fun do_skip
504 do
505 while source.is_ok and not predicate.apply(source.item) do source.next
506 end
507
508 redef fun is_ok do return source.is_ok
509
510 redef fun item do return source.item
511
512 redef fun next
513 do
514 source.next
515 do_skip
516 end
517 end
518
519 private class PipeMap[E, F]
520 super Iterator[F]
521
522 var source: Iterator[E]
523 var function: Function[E, F]
524
525 var item_cache: nullable F = null
526 var item_cached = false
527
528 redef fun is_ok do return source.is_ok
529
530 redef fun item do
531 if item_cached then return item_cache
532 item_cache = function.apply(source.item)
533 item_cached = true
534 return item_cache
535 end
536
537 redef fun next do
538 source.next
539 item_cached = false
540 end
541 end
542
543 # Stateless singleton that reify to the `to_s` method.
544 #
545 # assert fun_to_s.apply(5) == "5"
546 fun fun_to_s: Function[Object, String] do return once new FunctionToS