README: document nit_env.sh
[nit.git] / lib / pipeline.nit
index 81d68c5..d3428f0 100644 (file)
 
 # Pipelined filters and operations on iterators.
 #
-# This module enhance `Collection`s with some methods that enable a
-# pipeline-like programing that offers the manupulation of
-# collections trough connected filters with reasonable memory constraints.
-#
-# The filters methods return a view bounded on the original collection.
-# Filters can be chained to build complex virtual collections.
-#
-# FIXME: Fix vocabulary (lazy/view/filter)
-# FIXME: Maybe the name of the method should indicate that the method is 'lazy' (or is a filter).
+# This module enhances `Iterator` with some methods that enable a pipeline-like programing.
+# The processing of elements in a pipeline is done trough connected filters that are implemented with reasonable memory constraints.
 module pipeline
 
-redef interface Collection[E]
-       # Filter: sort with ComparableSorter.
+redef interface Iterator[E]
+       # Filter: sort with `default_comparator`.
        # SEE: `sort_with` for details
        # REQUIRE: self isa Iterator[Comparable]
        #
-       #     assert [1,3,2].sort_filter.to_a        ==  [1,2,3]
-       fun sort_filter: Collection[E]
+       #     assert [1,3,2].iterator.sort.to_a      ==  [1,2,3]
+       fun sort: Iterator[E]
        do
-               assert self isa Collection[Comparable]
-               var sorter = new ComparableSorter[Comparable]
+               assert self isa Iterator[Comparable]
                var a = self.to_a
-               sorter.sort(a)
-               return a
+               default_comparator.sort(a)
+               return a.iterator
        end
 
-       # Filter: sort with a given `sorter`.
+       # Filter: sort with a given `comparator`.
        # Important: require O(n) memory.
-       fun sort_with(sorter: AbstractSorter[E]): Collection[E]
+       #
+       #     assert ["a", "c", "b"].iterator.sort_with(alpha_comparator).to_a  == ["a", "b", "c"]
+       fun sort_with(comparator: Comparator): Iterator[E]
        do
                var a = self.to_a
-               sorter.sort(a)
-               return a
+               comparator.sort(a)
+               return a.iterator
        end
 
        # Filter: reject duplicates.
@@ -55,12 +49,12 @@ redef interface Collection[E]
        # Important: rely on `==` and `hash`
        # Important: require O(m) in memory, where m is the total number of uniq items.
        #
-       #     assert [1,2,1,1,1,3,2].uniq.to_a       ==  [1,2,3]
+       #     assert [1,2,1,1,1,3,2].iterator.uniq.to_a      ==  [1,2,3]
        #
        # REQUIRE: self isa Iterator[Object]
-       fun uniq: Collection[E]
+       fun uniq: Iterator[E]
        do
-               assert self isa Collection[Object]
+               assert self isa Iterator[Object]
                return new PipeUniq[E](self)
        end
 
@@ -68,84 +62,185 @@ redef interface Collection[E]
        #
        # Important: rely on `==`.
        #
-       #     assert [1,2,1,1,1,3,2].seq_uniq.to_a           ==  [1,2,1,3,2]
-       fun seq_uniq: Collection[E]
+       #     assert [1,2,1,1,1,3,2].iterator.seq_uniq.to_a          ==  [1,2,1,3,2]
+       fun seq_uniq: Iterator[E]
        do
                return new PipeSeqUniq[E](self)
        end
 
-       # The view of two combined collections.
+       # Combine two iterators.
+       #
+       # When the first iterator is terminated, the second is started.
        #
-       #     assert ([1..20[ + [20..40[).to_a       ==  ([1..40[).to_a
-       fun +(other: Collection[E]): Collection[E]
+       #     assert ([1..20[.iterator + [20..40[.iterator).to_a             ==  ([1..40[).to_a
+       fun +(other: Iterator[E]): Iterator[E]
        do
                return new PipeJoin[E](self, other)
        end
 
        # Alternate each item with `e`.
        #
-       #    assert [1,2,3].alternate(0).to_a                ==  [1,0,2,0,3]
-       fun alternate(e: E): Collection[E]
+       #     assert [1,2,3].iterator.alternate(0).to_a              ==  [1,0,2,0,3]
+       fun alternate(e: E): Iterator[E]
        do
                return new PipeAlternate[E](self, e)
        end
 
        # Filter: reject a given `item`.
        #
-       #    assert [1,1,2,1,3].skip(1).to_a                 ==  [2,3]
-       fun skip(item: E): Collection[E]
+       #     assert [1,1,2,1,3].iterator.skip(1).to_a               ==  [2,3]
+       fun skip(item: E): Iterator[E]
        do
                return new PipeSkip[E](self, item)
        end
 
        # Filter: keep only the first `length` items.
        #
-       #     assert [1,2,3,4,5].head(2).to_a        ==  [1,2]
-       fun head(length: Int): Collection[E]
+       # This filter does not always consume `self'.
+       #
+       #     var i = [1,2,3,4,5].iterator
+       #     assert i.head(2).to_a   == [1,2]
+       #     assert i.to_a           == [3,4,5]
+       fun head(length: Int): Iterator[E]
        do
                return new PipeHead[E](self, length)
        end
 
        # Filter: reject the first `length` items.
        #
-       #     assert [1,2,3,4,5].skip_head(2).to_a           ==  [3,4,5]
+       #     assert [1,2,3,4,5].iterator.skip_head(2).to_a          ==  [3,4,5]
        #
        # ENSURE: self == return
-       fun skip_head(length: Int): Collection[E]
+       fun skip_head(length: Int): Iterator[E]
        do
-               return new PipeSkipHead[E](self, length)
+               while length > 0 and self.is_ok do
+                       length -= 1
+                       self.next
+               end
+               return self
        end
 
        # Filter: keep only the last `length` items.
        #
-       #     assert [1,2,3,4,5].tail(2).to_a        ==  [4,5]
+       #     assert [1,2,3,4,5].iterator.tail(2).to_a       ==  [4,5]
        #
        # Important: require O(length) in memory
-       fun tail(length: Int): Collection[E]
+       fun tail(length: Int): Iterator[E]
        do
-               return new PipeTail[E](self, length)
+               var lasts = new List[E]
+               while self.is_ok do
+                       while lasts.length >= length do lasts.shift
+                       lasts.push(self.item)
+                       self.next
+               end
+               return lasts.iterator
        end
 
        # Filter: reject the last `length` items.
        #
-       #     assert [1,2,3,4,5].skip_tail(2).to_a           ==  [1,2,3]
+       #     assert [1,2,3,4,5].iterator.skip_tail(2).to_a          ==  [1,2,3]
        #
        # Important: require O(length) in memory
-       fun skip_tail(length: Int): Collection[E]
+       fun skip_tail(length: Int): Iterator[E]
        do
                return new PipeSkipTail[E](self, length)
        end
+
+       # Filter: reject items that does not meet some criteria.
+       #
+       #     class IsEvenFunction
+       #       super Function[Int, Bool]
+       #       redef fun apply(i) do return i % 2 == 0
+       #     end
+       #     assert [1,2,3,4,8].iterator.select(new IsEvenFunction).to_a  == [2,4,8]
+       fun select(predicate: Function[E, Bool]): Iterator[E]
+       do
+               return new PipeSelect[E](self, predicate)
+       end
 end
 
-### Specific private collection and iterator classes
+# Wraps an iterator to skip nulls.
+#
+# ~~~nit
+# var i: Iterator[Int]
+#
+# i = new NullSkipper[Int]([null, 1, null, 2, null: nullable Int].iterator)
+# assert i.to_a == [1, 2]
+#
+# i = new NullSkipper[Int]([1, null, 2, 3: nullable Int].iterator)
+# assert i.to_a == [1, 2, 3]
+# ~~~
+class NullSkipper[E: Object]
+       super Iterator[E]
 
-private class PipeUniq[E]
-       super NaiveCollection[E]
-       var source: Collection[E]
-       redef fun iterator do return new PipeUniqIterator[E](source.iterator)
+       # The inner iterator.
+       var inner: Iterator[nullable E]
+
+       redef fun finish do inner.finish
+
+       redef fun is_ok do
+               skip_nulls
+               return inner.is_ok
+       end
+
+       redef fun item do
+               skip_nulls
+               return inner.item.as(E)
+       end
+
+       redef fun next do
+               inner.next
+               skip_nulls
+       end
+
+       private fun skip_nulls do
+               while inner.is_ok and inner.item == null do inner.next
+       end
+end
+
+# Interface that reify a function.
+# Concrete subclasses must implements the `apply` method.
+#
+# This interface helps to manipulate function-like objects.
+#
+# The main usage it as a transformation; that takes an argument and produce a result.
+# See `map` for example.
+#
+# Another usage is as a predicate, with `Function[E, Bool]`.
+# See `Iterator::select` for example.
+#
+# Function with more than one argument can be reified with some uncurification.
+# Eg. `Function[ARG1, Function[ARG2, RES]]`.
+#
+# NOTE: Nit is not a functionnal language, this class is a very basic way to
+# simulate the reification of a simple function.
+interface Function[FROM, TO]
+       # How an element is mapped to another one.
+       fun apply(e: FROM): TO is abstract
+
+       # Filter: produce an iterator which each element is transformed.
+       #
+       #     var i = [1,2,3].iterator
+       #     assert fun_to_s.map(i).to_a  == ["1", "2", "3"]
+       #
+       # Note: because there is no generic method in Nit (yet?),
+       # there is no way to have a better API.
+       # eg. with the Iterator as receiver and the function as argument.
+       # (see `Iterator::select`)
+       fun map(i: Iterator[FROM]): Iterator[TO]
+       do
+               return new PipeMap[FROM, TO](i, self)
+       end
+end
+
+private class FunctionToS
+       super Function[Object, String]
+       redef fun apply(e) do return e.to_s
 end
 
-private class PipeUniqIterator[E]
+### Specific private iterator classes
+
+private class PipeUniq[E]
        super Iterator[E]
 
        var source: Iterator[E]
@@ -167,12 +262,6 @@ private class PipeUniqIterator[E]
 end
 
 private class PipeSeqUniq[E]
-       super NaiveCollection[E]
-       var source: Collection[E]
-       redef fun iterator do return new PipeSeqUniqIterator[E](source.iterator)
-end
-
-private class PipeSeqUniqIterator[E]
        super Iterator[E]
 
        var source: Iterator[E]
@@ -192,13 +281,6 @@ private class PipeSeqUniqIterator[E]
 end
 
 private class PipeJoin[E]
-       super NaiveCollection[E]
-       var source1: Collection[E]
-       var source2: Collection[E]
-       redef fun iterator do return new PipeJoinIterator[E](source1.iterator, source2.iterator)
-end
-
-private class PipeJoinIterator[E]
        super Iterator[E]
        var source1: Iterator[E]
        var source2: Iterator[E]
@@ -220,13 +302,6 @@ private class PipeJoinIterator[E]
 end
 
 private class PipeAlternate[E]
-       super NaiveCollection[E]
-       var source: Collection[E]
-       var odd_item: E
-       redef fun iterator do return new PipeAlternateIterator[E](source.iterator, odd_item)
-end
-
-private class PipeAlternateIterator[E]
        super Iterator[E]
 
        var source: Iterator[E]
@@ -254,26 +329,12 @@ private class PipeAlternateIterator[E]
 end
 
 private class PipeSkip[E]
-       super NaiveCollection[E]
-       var source: Collection[E]
-       var skip_item: E
-       redef fun iterator do return new PipeSkipIterator[E](source.iterator, skip_item)
-end
-
-private class PipeSkipIterator[E]
-       super NaiveCollection[E]
        super Iterator[E]
 
        var source: Iterator[E]
        var skip_item: E
 
-       init(source: Iterator[E], skip_item: E)
-       do
-               self.source = source
-               self.skip_item = skip_item
-
-               do_skip
-       end
+       init do do_skip
 
        fun do_skip
        do
@@ -292,13 +353,6 @@ private class PipeSkipIterator[E]
 end
 
 private class PipeHead[E]
-       super NaiveCollection[E]
-       var source: Collection[E]
-       var pipe_length: Int
-       redef fun iterator do return new PipeHeadIterator[E](source.iterator, pipe_length)
-end
-
-private class PipeHeadIterator[E]
        super Iterator[E]
 
        var source: Iterator[E]
@@ -316,48 +370,7 @@ private class PipeHeadIterator[E]
        end
 end
 
-private class PipeSkipHead[E]
-       super NaiveCollection[E]
-       var source: Collection[E]
-       var pipe_length: Int
-       redef fun iterator
-       do
-               var res = source.iterator
-               var length = pipe_length
-               while length > 0 and res.is_ok do
-                       length -= 1
-                       res.next
-               end
-               return res
-       end
-end
-
-private class PipeTail[E]
-       super NaiveCollection[E]
-       var source: Collection[E]
-       var pipe_length: Int
-       redef fun iterator
-       do
-               var lasts = new List[E]
-               var res = source.iterator
-               var length = pipe_length
-               while res.is_ok do
-                       while lasts.length >= length do lasts.shift
-                       lasts.push(res.item)
-                       res.next
-               end
-               return lasts.iterator
-       end
-end
-
 private class PipeSkipTail[E]
-       super NaiveCollection[E]
-       var source: Collection[E]
-       var pipe_length: Int
-       redef fun iterator do return new PipeSkipTailIterator[E](source.iterator, pipe_length)
-end
-
-private class PipeSkipTailIterator[E]
        super Iterator[E]
 
        var source: Iterator[E]
@@ -366,10 +379,8 @@ private class PipeSkipTailIterator[E]
 
        var lasts = new List[E]
 
-       init(source: Iterator[E], length: Int)
+       init
        do
-               self.source = source
-               self.length = length
                var lasts = self.lasts
                while source.is_ok and lasts.length < length do
                        lasts.push(source.item)
@@ -388,3 +399,57 @@ private class PipeSkipTailIterator[E]
                source.next
        end
 end
+
+private class PipeSelect[E]
+       super Iterator[E]
+
+       var source: Iterator[E]
+
+       var predicate: Function[E, Bool]
+
+       init do do_skip
+
+       fun do_skip
+       do
+               while source.is_ok and not predicate.apply(source.item) do source.next
+       end
+
+       redef fun is_ok do return source.is_ok
+
+       redef fun item do return source.item
+
+       redef fun next
+       do
+               source.next
+               do_skip
+       end
+end
+
+private class PipeMap[E, F]
+       super Iterator[F]
+
+       var source: Iterator[E]
+       var function: Function[E, F]
+
+       var item_cache: nullable F = null
+       var item_cached = false
+
+       redef fun is_ok do return source.is_ok
+
+       redef fun item do
+               if item_cached then return item_cache
+               item_cache = function.apply(source.item)
+               item_cached = true
+               return item_cache
+       end
+
+       redef fun next do
+               source.next
+               item_cached = false
+       end
+end
+
+# Stateless singleton that reify to the `to_s` method.
+#
+#     assert fun_to_s.apply(5)  == "5"
+fun fun_to_s: Function[Object, String] do return once new FunctionToS