X-Git-Url: http://nitlanguage.org diff --git a/lib/pipeline.nit b/lib/pipeline.nit index 28e2828..54be22b 100644 --- a/lib/pipeline.nit +++ b/lib/pipeline.nit @@ -14,36 +14,32 @@ # Pipelined filters and operations on iterators. # -# This module enhance `Iterator`s with some methods that enable a -# pipeline-like programing that offers the manupulation of -# collections trough connected filters with reasonable memory constraints. +# This module enhances `Iterator` with some methods that enable a pipeline-like programing. +# The processing of elements in a pipeline is done trough connected filters that are implemented with reasonable memory constraints. module pipeline redef interface Iterator[E] - # Filter: sort with ComparableSorter. + # Filter: sort with `default_comparator`. # SEE: `sort_with` for details # REQUIRE: self isa Iterator[Comparable] # - # [1,3,2].iterator.sort.to_a #=> [1,2,3] + # assert [1,3,2].iterator.sort.to_a == [1,2,3] fun sort: Iterator[E] do assert self isa Iterator[Comparable] - var sorter = new ComparableSorter[Comparable] var a = self.to_a - sorter.sort(a) + default_comparator.sort(a) return a.iterator end - # Filter: sort with a given `sorter`. + # Filter: sort with a given `comparator`. # Important: require O(n) memory. # - # REQUIRE: self isa Iterator[Object] - # FIXME: AbstractSorter[E] is refused - fun sort_with(sorter: AbstractSorter[Object]): Iterator[E] + # assert ["a", "c", "b"].iterator.sort_with(alpha_comparator).to_a == ["a", "b", "c"] + fun sort_with(comparator: Comparator): Iterator[E] do - assert self isa Iterator[Object] var a = self.to_a - sorter.sort(a) + comparator.sort(a) return a.iterator end @@ -53,7 +49,7 @@ redef interface Iterator[E] # Important: rely on `==` and `hash` # Important: require O(m) in memory, where m is the total number of uniq items. # - # [1,2,1,1,1,3,2].iterator.uniq.to_a #=> [1,2,3] + # assert [1,2,1,1,1,3,2].iterator.uniq.to_a == [1,2,3] # # REQUIRE: self isa Iterator[Object] fun uniq: Iterator[E] @@ -66,7 +62,7 @@ redef interface Iterator[E] # # Important: rely on `==`. # - # [1,2,1,1,1,3,2].iterator.uniq.to_a #=> [1,2,1,3,2] + # assert [1,2,1,1,1,3,2].iterator.seq_uniq.to_a == [1,2,1,3,2] fun seq_uniq: Iterator[E] do return new PipeSeqUniq[E](self) @@ -76,7 +72,9 @@ redef interface Iterator[E] # # When the first iterator is terminated, the second is started. # - # ([1,2].iterator + [3,4].iterator).to_a #=> [1,2,3,4] + # assert ([1..20[.iterator + [20..40[.iterator).to_a == ([1..40[).to_a + # + # SEE: `Iterator2` fun +(other: Iterator[E]): Iterator[E] do return new PipeJoin[E](self, other) @@ -84,7 +82,7 @@ redef interface Iterator[E] # Alternate each item with `e`. # - # [1,2,3].iterator.alternate(0).to_a #=> [1,0,2,0,3] + # assert [1,2,3].iterator.alternate(0).to_a == [1,0,2,0,3] fun alternate(e: E): Iterator[E] do return new PipeAlternate[E](self, e) @@ -92,7 +90,7 @@ redef interface Iterator[E] # Filter: reject a given `item`. # - # [1,1,2,1,3].iterator.skip(1).to_a #=> [2,3] + # assert [1,1,2,1,3].iterator.skip(1).to_a == [2,3] fun skip(item: E): Iterator[E] do return new PipeSkip[E](self, item) @@ -102,9 +100,9 @@ redef interface Iterator[E] # # This filter does not always consume `self'. # - # var i = [1,2,3,4,5].iterator - # i.head(2).to_a #=> [1,2] - # i.to_a #=> [3,4,5] + # var i = [1,2,3,4,5].iterator + # assert i.head(2).to_a == [1,2] + # assert i.to_a == [3,4,5] fun head(length: Int): Iterator[E] do return new PipeHead[E](self, length) @@ -112,7 +110,7 @@ redef interface Iterator[E] # Filter: reject the first `length` items. # - # [1,2,3,4,5].iterator.skip_head(2).to_a #=> [3,4,5] + # assert [1,2,3,4,5].iterator.skip_head(2).to_a == [3,4,5] # # ENSURE: self == return fun skip_head(length: Int): Iterator[E] @@ -126,7 +124,7 @@ redef interface Iterator[E] # Filter: keep only the last `length` items. # - # [1,2,3,4,5].iterator.tail(2).to_a #=> [4,5] + # assert [1,2,3,4,5].iterator.tail(2).to_a == [4,5] # # Important: require O(length) in memory fun tail(length: Int): Iterator[E] @@ -142,13 +140,193 @@ redef interface Iterator[E] # Filter: reject the last `length` items. # - # [1,2,3,4,5].iterator.skip_tail(2).to_a #=> [1,2,3] + # assert [1,2,3,4,5].iterator.skip_tail(2).to_a == [1,2,3] # # Important: require O(length) in memory fun skip_tail(length: Int): Iterator[E] do return new PipeSkipTail[E](self, length) end + + # Filter: reject items that does not meet some criteria. + # + # class IsEvenFunction + # super Function[Int, Bool] + # redef fun apply(i) do return i % 2 == 0 + # end + # assert [1,2,3,4,8].iterator.select(new IsEvenFunction).to_a == [2,4,8] + fun select(predicate: Function[E, Bool]): Iterator[E] + do + return new PipeSelect[E](self, predicate) + end +end + +# Concatenates a sequence of iterators. +# +# Wraps an iterator of sub-iterators and iterates over the elements of the +# sub-iterators. +# +# ~~~nit +# var i: Iterator[Int] +# var empty = new Array[Int] +# +# i = new Iterator2[Int]([ +# [1, 2, 3].iterator, +# empty.iterator, +# [4, 5].iterator +# ].iterator) +# assert i.to_a == [1, 2, 3, 4, 5] +# +# i = new Iterator2[Int]([ +# empty.iterator, +# [42].iterator, +# empty.iterator +# ].iterator) +# assert i.to_a == [42] +# ~~~ +# +# SEE: `Iterator::+` +class Iterator2[E] + super Iterator[E] + + # The inner iterator over sub-iterators. + var inner: Iterator[Iterator[E]] + + redef fun finish + do + var i = current_iterator + if i != null then i.finish + end + + redef fun is_ok + do + var i = current_iterator + if i == null then return false + return i.is_ok + end + + redef fun item + do + var i = current_iterator + assert i != null + return i.item + end + + redef fun next + do + var i = current_iterator + assert i != null + i.next + end + + redef fun start + do + var i = current_iterator + if i != null then i.start + end + + private var previous_iterator: nullable Iterator[E] = null + + private fun current_iterator: nullable Iterator[E] + do + if previous_iterator == null then + # Get the first sub-iterator. + if inner.is_ok then + previous_iterator = inner.item + previous_iterator.start + inner.next + else + return null + end + end + # Get the first sub-iterator that has a current item. + while inner.is_ok and not previous_iterator.is_ok do + previous_iterator.finish + previous_iterator = inner.item + previous_iterator.start + inner.next + end + return previous_iterator + end +end + +# Wraps an iterator to skip nulls. +# +# ~~~nit +# var i: Iterator[Int] +# +# i = new NullSkipper[Int]([null, 1, null, 2, null: nullable Int].iterator) +# assert i.to_a == [1, 2] +# +# i = new NullSkipper[Int]([1, null, 2, 3: nullable Int].iterator) +# assert i.to_a == [1, 2, 3] +# ~~~ +class NullSkipper[E: Object] + super Iterator[E] + + # The inner iterator. + var inner: Iterator[nullable E] + + redef fun finish do inner.finish + + redef fun is_ok do + skip_nulls + return inner.is_ok + end + + redef fun item do + skip_nulls + return inner.item.as(E) + end + + redef fun next do + inner.next + skip_nulls + end + + private fun skip_nulls do + while inner.is_ok and inner.item == null do inner.next + end +end + +# Interface that reify a function. +# Concrete subclasses must implements the `apply` method. +# +# This interface helps to manipulate function-like objects. +# +# The main usage it as a transformation; that takes an argument and produce a result. +# See `map` for example. +# +# Another usage is as a predicate, with `Function[E, Bool]`. +# See `Iterator::select` for example. +# +# Function with more than one argument can be reified with some uncurification. +# Eg. `Function[ARG1, Function[ARG2, RES]]`. +# +# NOTE: Nit is not a functionnal language, this class is a very basic way to +# simulate the reification of a simple function. +interface Function[FROM, TO] + # How an element is mapped to another one. + fun apply(e: FROM): TO is abstract + + # Filter: produce an iterator which each element is transformed. + # + # var i = [1,2,3].iterator + # assert fun_to_s.map(i).to_a == ["1", "2", "3"] + # + # Note: because there is no generic method in Nit (yet?), + # there is no way to have a better API. + # eg. with the Iterator as receiver and the function as argument. + # (see `Iterator::select`) + fun map(i: Iterator[FROM]): Iterator[TO] + do + return new PipeMap[FROM, TO](i, self) + end +end + +private class FunctionToS + super Function[Object, String] + redef fun apply(e) do return e.to_s end ### Specific private iterator classes @@ -247,13 +425,7 @@ private class PipeSkip[E] var source: Iterator[E] var skip_item: E - init(source: Iterator[E], skip_item: E) - do - self.source = source - self.skip_item = skip_item - - do_skip - end + init do do_skip fun do_skip do @@ -298,10 +470,8 @@ private class PipeSkipTail[E] var lasts = new List[E] - init(source: Iterator[E], length: Int) + init do - self.source = source - self.length = length var lasts = self.lasts while source.is_ok and lasts.length < length do lasts.push(source.item) @@ -320,3 +490,57 @@ private class PipeSkipTail[E] source.next end end + +private class PipeSelect[E] + super Iterator[E] + + var source: Iterator[E] + + var predicate: Function[E, Bool] + + init do do_skip + + fun do_skip + do + while source.is_ok and not predicate.apply(source.item) do source.next + end + + redef fun is_ok do return source.is_ok + + redef fun item do return source.item + + redef fun next + do + source.next + do_skip + end +end + +private class PipeMap[E, F] + super Iterator[F] + + var source: Iterator[E] + var function: Function[E, F] + + var item_cache: nullable F = null + var item_cached = false + + redef fun is_ok do return source.is_ok + + redef fun item do + if item_cached then return item_cache + item_cache = function.apply(source.item) + item_cached = true + return item_cache + end + + redef fun next do + source.next + item_cached = false + end +end + +# Stateless singleton that reify to the `to_s` method. +# +# assert fun_to_s.apply(5) == "5" +fun fun_to_s: Function[Object, String] do return once new FunctionToS