X-Git-Url: http://nitlanguage.org diff --git a/lib/pipeline.nit b/lib/pipeline.nit index b95220b..0ca08b2 100644 --- a/lib/pipeline.nit +++ b/lib/pipeline.nit @@ -14,43 +14,33 @@ # Pipelined filters and operations on iterators. # -# This module enhance `Collection`s with some methods that enable a -# pipeline-like programing that offers the manupulation of -# collections trough connected filters with reasonable memory constraints. -# -# The filters methods return a view bounded on the original collection. -# Filters can be chained to build complex virtual collections. -# -# FIXME: Fix vocabulary (lazy/view/filter) -# FIXME: Maybe the name of the method should indicate that the method is 'lazy' (or is a filter). +# This module enhances `Iterator` with some methods that enable a pipeline-like programing. +# The processing of elements in a pipeline is done trough connected filters that are implemented with reasonable memory constraints. module pipeline -redef interface Collection[E] - # Filter: sort with ComparableSorter. +redef interface Iterator[E] + # Filter: sort with `default_comparator`. # SEE: `sort_with` for details # REQUIRE: self isa Iterator[Comparable] # - # [1,3,2].sort_filter #=> [1,2,3] - fun sort_filter: Collection[E] + # assert [1,3,2].iterator.sort.to_a == [1,2,3] + fun sort: Iterator[E] do - assert self isa Collection[Comparable] - var sorter = new ComparableSorter[Comparable] + assert self isa Iterator[Comparable] var a = self.to_a - sorter.sort(a) - return a + default_comparator.sort(a) + return a.iterator end - # Filter: sort with a given `sorter`. + # Filter: sort with a given `comparator`. # Important: require O(n) memory. # - # REQUIRE: self isa Iterator[Object] - # FIXME: AbstractSorter[E] is refused - fun sort_with(sorter: AbstractSorter[Object]): Collection[E] + # assert ["a", "c", "b"].iterator.sort_with(alpha_comparator).to_a == ["a", "b", "c"] + fun sort_with(comparator: Comparator): Iterator[E] do - assert self isa Collection[Object] var a = self.to_a - sorter.sort(a) - return a + comparator.sort(a) + return a.iterator end # Filter: reject duplicates. @@ -59,12 +49,12 @@ redef interface Collection[E] # Important: rely on `==` and `hash` # Important: require O(m) in memory, where m is the total number of uniq items. # - # [1,2,1,1,1,3,2].uniq #=> [1,2,3] + # assert [1,2,1,1,1,3,2].iterator.uniq.to_a == [1,2,3] # # REQUIRE: self isa Iterator[Object] - fun uniq: Collection[E] + fun uniq: Iterator[E] do - assert self isa Collection[Object] + assert self isa Iterator[Object] return new PipeUniq[E](self) end @@ -72,84 +62,185 @@ redef interface Collection[E] # # Important: rely on `==`. # - # [1,2,1,1,1,3,2].seq_uniq #=> [1,2,1,3,2] - fun seq_uniq: Collection[E] + # assert [1,2,1,1,1,3,2].iterator.seq_uniq.to_a == [1,2,1,3,2] + fun seq_uniq: Iterator[E] do return new PipeSeqUniq[E](self) end - # The view of two combined collections. + # Combine two iterators. + # + # When the first iterator is terminated, the second is started. # - # ([1..20[ + [20..40[) #=> [1..40[ - fun +(other: Collection[E]): Collection[E] + # assert ([1..20[.iterator + [20..40[.iterator).to_a == ([1..40[).to_a + fun +(other: Iterator[E]): Iterator[E] do return new PipeJoin[E](self, other) end # Alternate each item with `e`. # - # [1,2,3].alternate(0) #=> [1,0,2,0,3] - fun alternate(e: E): Collection[E] + # assert [1,2,3].iterator.alternate(0).to_a == [1,0,2,0,3] + fun alternate(e: E): Iterator[E] do return new PipeAlternate[E](self, e) end # Filter: reject a given `item`. # - # [1,1,2,1,3].skip(1) #=> [2,3] - fun skip(item: E): Collection[E] + # assert [1,1,2,1,3].iterator.skip(1).to_a == [2,3] + fun skip(item: E): Iterator[E] do return new PipeSkip[E](self, item) end # Filter: keep only the first `length` items. # - # [1,2,3,4,5].head(2) #=> [1,2] - fun head(length: Int): Collection[E] + # This filter does not always consume `self'. + # + # var i = [1,2,3,4,5].iterator + # assert i.head(2).to_a == [1,2] + # assert i.to_a == [3,4,5] + fun head(length: Int): Iterator[E] do return new PipeHead[E](self, length) end # Filter: reject the first `length` items. # - # [1,2,3,4,5].skip_head(2) #=> [3,4,5] + # assert [1,2,3,4,5].iterator.skip_head(2).to_a == [3,4,5] # # ENSURE: self == return - fun skip_head(length: Int): Collection[E] + fun skip_head(length: Int): Iterator[E] do - return new PipeSkipHead[E](self, length) + while length > 0 and self.is_ok do + length -= 1 + self.next + end + return self end # Filter: keep only the last `length` items. # - # [1,2,3,4,5].tail(2) #=> [4,5] + # assert [1,2,3,4,5].iterator.tail(2).to_a == [4,5] # # Important: require O(length) in memory - fun tail(length: Int): Collection[E] + fun tail(length: Int): Iterator[E] do - return new PipeTail[E](self, length) + var lasts = new List[E] + while self.is_ok do + while lasts.length >= length do lasts.shift + lasts.push(self.item) + self.next + end + return lasts.iterator end # Filter: reject the last `length` items. # - # [1,2,3,4,5].skip_tail(2) #=> [1,2,3] + # assert [1,2,3,4,5].iterator.skip_tail(2).to_a == [1,2,3] # # Important: require O(length) in memory - fun skip_tail(length: Int): Collection[E] + fun skip_tail(length: Int): Iterator[E] do return new PipeSkipTail[E](self, length) end + + # Filter: reject items that does not meet some criteria. + # + # class IsEvenFunction + # super Function[Int, Bool] + # redef fun apply(i) do return i % 2 == 0 + # end + # assert [1,2,3,4,8].iterator.select(new IsEvenFunction).to_a == [2,4,8] + fun select(predicate: Function[E, Bool]): Iterator[E] + do + return new PipeSelect[E](self, predicate) + end end -### Specific private collection and iterator classes +# Wraps an iterator to skip nulls. +# +# ~~~nit +# var i: Iterator[Int] +# +# i = new NullSkipper[Int]([null, 1, null, 2, null: nullable Int].iterator) +# assert i.to_a == [1, 2] +# +# i = new NullSkipper[Int]([1, null, 2, 3: nullable Int].iterator) +# assert i.to_a == [1, 2, 3] +# ~~~ +class NullSkipper[E: Object] + super Iterator[E] -private class PipeUniq[E] - super NaiveCollection[E] - var source: Collection[E] - redef fun iterator do return new PipeUniqIterator[E](source.iterator) + # The inner iterator. + var inner: Iterator[nullable E] + + redef fun finish do inner.finish + + redef fun is_ok do + skip_nulls + return inner.is_ok + end + + redef fun item do + skip_nulls + return inner.item.as(E) + end + + redef fun next do + inner.next + skip_nulls + end + + private fun skip_nulls do + while inner.is_ok and inner.item == null do inner.next + end +end + +# Interface that reify a function. +# Concrete subclasses must implements the `apply` method. +# +# This interface helps to manipulate function-like objects. +# +# The main usage it as a transformation; that takes an argument and produce a result. +# See `map` for example. +# +# Another usage is as a predicate, with `Function[E, Bool]`. +# See `Iterator::select` for example. +# +# Function with more than one argument can be reified with some uncurification. +# Eg. `Function[ARG1, Function[ARG2, RES]]`. +# +# NOTE: Nit is not a functionnal language, this class is a very basic way to +# simulate the reification of a simple function. +interface Function[FROM, TO] + # How an element is mapped to another one. + fun apply(e: FROM): TO is abstract + + # Filter: produce an iterator which each element is transformed. + # + # var i = [1,2,3].iterator + # assert fun_to_s.map(i).to_a == ["1", "2", "3"] + # + # Note: because there is no generic method in Nit (yet?), + # there is no way to have a better API. + # eg. with the Iterator as receiver and the function as argument. + # (see `Iterator::select`) + fun map(i: Iterator[FROM]): Iterator[TO] + do + return new PipeMap[FROM, TO](i, self) + end +end + +private class FunctionToS + super Function[Object, String] + redef fun apply(e) do return e.to_s end -private class PipeUniqIterator[E] +### Specific private iterator classes + +private class PipeUniq[E] super Iterator[E] var source: Iterator[E] @@ -171,12 +262,6 @@ private class PipeUniqIterator[E] end private class PipeSeqUniq[E] - super NaiveCollection[E] - var source: Collection[E] - redef fun iterator do return new PipeSeqUniqIterator[E](source.iterator) -end - -private class PipeSeqUniqIterator[E] super Iterator[E] var source: Iterator[E] @@ -196,13 +281,6 @@ private class PipeSeqUniqIterator[E] end private class PipeJoin[E] - super NaiveCollection[E] - var source1: Collection[E] - var source2: Collection[E] - redef fun iterator do return new PipeJoinIterator[E](source1.iterator, source2.iterator) -end - -private class PipeJoinIterator[E] super Iterator[E] var source1: Iterator[E] var source2: Iterator[E] @@ -224,13 +302,6 @@ private class PipeJoinIterator[E] end private class PipeAlternate[E] - super NaiveCollection[E] - var source: Collection[E] - var odd_item: E - redef fun iterator do return new PipeAlternateIterator[E](source.iterator, odd_item) -end - -private class PipeAlternateIterator[E] super Iterator[E] var source: Iterator[E] @@ -258,26 +329,12 @@ private class PipeAlternateIterator[E] end private class PipeSkip[E] - super NaiveCollection[E] - var source: Collection[E] - var skip_item: E - redef fun iterator do return new PipeSkipIterator[E](source.iterator, skip_item) -end - -private class PipeSkipIterator[E] - super NaiveCollection[E] super Iterator[E] var source: Iterator[E] var skip_item: E - init(source: Iterator[E], skip_item: E) - do - self.source = source - self.skip_item = skip_item - - do_skip - end + init do do_skip fun do_skip do @@ -296,13 +353,6 @@ private class PipeSkipIterator[E] end private class PipeHead[E] - super NaiveCollection[E] - var source: Collection[E] - var pipe_length: Int - redef fun iterator do return new PipeHeadIterator[E](source.iterator, pipe_length) -end - -private class PipeHeadIterator[E] super Iterator[E] var source: Iterator[E] @@ -320,48 +370,7 @@ private class PipeHeadIterator[E] end end -private class PipeSkipHead[E] - super NaiveCollection[E] - var source: Collection[E] - var pipe_length: Int - redef fun iterator - do - var res = source.iterator - var length = pipe_length - while length > 0 and res.is_ok do - length -= 1 - res.next - end - return res - end -end - -private class PipeTail[E] - super NaiveCollection[E] - var source: Collection[E] - var pipe_length: Int - redef fun iterator - do - var lasts = new List[E] - var res = source.iterator - var length = pipe_length - while res.is_ok do - while lasts.length >= length do lasts.shift - lasts.push(res.item) - res.next - end - return lasts.iterator - end -end - private class PipeSkipTail[E] - super NaiveCollection[E] - var source: Collection[E] - var pipe_length: Int - redef fun iterator do return new PipeSkipTailIterator[E](source.iterator, pipe_length) -end - -private class PipeSkipTailIterator[E] super Iterator[E] var source: Iterator[E] @@ -370,10 +379,8 @@ private class PipeSkipTailIterator[E] var lasts = new List[E] - init(source: Iterator[E], length: Int) + init do - self.source = source - self.length = length var lasts = self.lasts while source.is_ok and lasts.length < length do lasts.push(source.item) @@ -392,3 +399,57 @@ private class PipeSkipTailIterator[E] source.next end end + +private class PipeSelect[E] + super Iterator[E] + + var source: Iterator[E] + + var predicate: Function[E, Bool] + + init do do_skip + + fun do_skip + do + while source.is_ok and not predicate.apply(source.item) do source.next + end + + redef fun is_ok do return source.is_ok + + redef fun item do return source.item + + redef fun next + do + source.next + do_skip + end +end + +private class PipeMap[E, F] + super Iterator[F] + + var source: Iterator[E] + var function: Function[E, F] + + var item_cache: nullable F = null + var item_cached = false + + redef fun is_ok do return source.is_ok + + redef fun item do + if item_cached then return item_cache + item_cache = function.apply(source.item) + item_cached = true + return item_cache + end + + redef fun next do + source.next + item_cached = false + end +end + +# Stateless singleton that reify to the `to_s` method. +# +# assert fun_to_s.apply(5) == "5" +fun fun_to_s: Function[Object, String] do return once new FunctionToS