be553ade0c79a8b7c7d294318b35faab7068ca8a
[nit.git] / lib / pipeline.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Pipelined filters and operations on iterators.
16 #
17 # This module enhances `Iterator` with some methods that enable a pipeline-like programing.
18 # The processing of elements in a pipeline is done trough connected filters that are implemented with reasonable memory constraints.
19 module pipeline
20
21 redef interface Iterator[E]
22 # Filter: sort with `default_comparator`.
23 # SEE: `sort_with` for details
24 # REQUIRE: self isa Iterator[Comparable]
25 #
26 # assert [1,3,2].iterator.sort.to_a == [1,2,3]
27 fun sort: Iterator[E]
28 do
29 assert self isa Iterator[Comparable]
30 var a = self.to_a
31 default_comparator.sort(a)
32 return a.iterator
33 end
34
35 # Filter: sort with a given `comparator`.
36 # Important: require O(n) memory.
37 #
38 # assert ["a", "c", "b"].iterator.sort_with(alpha_comparator).to_a == ["a", "b", "c"]
39 fun sort_with(comparator: Comparator): Iterator[E]
40 do
41 var a = self.to_a
42 comparator.sort(a)
43 return a.iterator
44 end
45
46 # Filter: reject duplicates.
47 # Elements already seen are rejected.
48 #
49 # Important: rely on `==` and `hash`
50 # Important: require O(m) in memory, where m is the total number of uniq items.
51 #
52 # assert [1,2,1,1,1,3,2].iterator.uniq.to_a == [1,2,3]
53 #
54 # REQUIRE: self isa Iterator[Object]
55 fun uniq: Iterator[E]
56 do
57 assert self isa Iterator[Object]
58 return new PipeUniq[E](self)
59 end
60
61 # Filter: reject continuous sequences of duplicates
62 #
63 # Important: rely on `==`.
64 #
65 # assert [1,2,1,1,1,3,2].iterator.seq_uniq.to_a == [1,2,1,3,2]
66 fun seq_uniq: Iterator[E]
67 do
68 return new PipeSeqUniq[E](self)
69 end
70
71 # Combine two iterators.
72 #
73 # When the first iterator is terminated, the second is started.
74 #
75 # assert ([1..20[.iterator + [20..40[.iterator).to_a == ([1..40[).to_a
76 fun +(other: Iterator[E]): Iterator[E]
77 do
78 return new PipeJoin[E](self, other)
79 end
80
81 # Alternate each item with `e`.
82 #
83 # assert [1,2,3].iterator.alternate(0).to_a == [1,0,2,0,3]
84 fun alternate(e: E): Iterator[E]
85 do
86 return new PipeAlternate[E](self, e)
87 end
88
89 # Filter: reject a given `item`.
90 #
91 # assert [1,1,2,1,3].iterator.skip(1).to_a == [2,3]
92 fun skip(item: E): Iterator[E]
93 do
94 return new PipeSkip[E](self, item)
95 end
96
97 # Filter: keep only the first `length` items.
98 #
99 # This filter does not always consume `self'.
100 #
101 # var i = [1,2,3,4,5].iterator
102 # assert i.head(2).to_a == [1,2]
103 # assert i.to_a == [3,4,5]
104 fun head(length: Int): Iterator[E]
105 do
106 return new PipeHead[E](self, length)
107 end
108
109 # Filter: reject the first `length` items.
110 #
111 # assert [1,2,3,4,5].iterator.skip_head(2).to_a == [3,4,5]
112 #
113 # ENSURE: self == return
114 fun skip_head(length: Int): Iterator[E]
115 do
116 while length > 0 and self.is_ok do
117 length -= 1
118 self.next
119 end
120 return self
121 end
122
123 # Filter: keep only the last `length` items.
124 #
125 # assert [1,2,3,4,5].iterator.tail(2).to_a == [4,5]
126 #
127 # Important: require O(length) in memory
128 fun tail(length: Int): Iterator[E]
129 do
130 var lasts = new List[E]
131 while self.is_ok do
132 while lasts.length >= length do lasts.shift
133 lasts.push(self.item)
134 self.next
135 end
136 return lasts.iterator
137 end
138
139 # Filter: reject the last `length` items.
140 #
141 # assert [1,2,3,4,5].iterator.skip_tail(2).to_a == [1,2,3]
142 #
143 # Important: require O(length) in memory
144 fun skip_tail(length: Int): Iterator[E]
145 do
146 return new PipeSkipTail[E](self, length)
147 end
148
149 # Filter: reject items that does not meet some criteria.
150 #
151 # class IsEvenFunction
152 # super Function[Int, Bool]
153 # redef fun apply(i) do return i % 2 == 0
154 # end
155 # assert [1,2,3,4,8].iterator.select(new IsEvenFunction).to_a == [2,4,8]
156 fun select(predicate: Function[E, Bool]): Iterator[E]
157 do
158 return new PipeSelect[E](self, predicate)
159 end
160 end
161
162 # Interface that reify a function.
163 # Concrete subclasses must implements the `apply` method.
164 #
165 # This interface helps to manipulate function-like objects.
166 #
167 # The main usage it as a transformation; that takes an argument and produce a result.
168 # See `map` for example.
169 #
170 # Another usage is as a predicate, with `Function[E, Bool]`.
171 # See `Iterator::select` for example.
172 #
173 # Function with more than one argument can be reified with some uncurification.
174 # Eg. `Function[ARG1, Function[ARG2, RES]]`.
175 #
176 # NOTE: Nit is not a functionnal language, this class is a very basic way to
177 # simulate the reification of a simple function.
178 interface Function[FROM, TO]
179 # How an element is mapped to another one.
180 fun apply(e: FROM): TO is abstract
181
182 # Filter: produce an iterator which each element is transformed.
183 #
184 # var i = [1,2,3].iterator
185 # assert fun_to_s.map(i).to_a == ["1", "2", "3"]
186 #
187 # Note: because there is no generic method in Nit (yet?),
188 # there is no way to have a better API.
189 # eg. with the Iterator as receiver and the function as argument.
190 # (see `Iterator::select`)
191 fun map(i: Iterator[FROM]): Iterator[TO]
192 do
193 return new PipeMap[FROM, TO](i, self)
194 end
195 end
196
197 private class FunctionToS
198 super Function[Object, String]
199 redef fun apply(e) do return e.to_s
200 end
201
202 ### Specific private iterator classes
203
204 private class PipeUniq[E]
205 super Iterator[E]
206
207 var source: Iterator[E]
208
209 var seen = new HashSet[Object] # FIXME HashSet[E]
210
211 redef fun is_ok do return source.is_ok
212
213 redef fun item do return source.item
214
215 redef fun next
216 do
217 self.seen.add(self.item.as(Object))
218 source.next
219 while source.is_ok and self.seen.has(source.item.as(Object)) do
220 source.next
221 end
222 end
223 end
224
225 private class PipeSeqUniq[E]
226 super Iterator[E]
227
228 var source: Iterator[E]
229
230 redef fun is_ok do return source.is_ok
231
232 redef fun item do return source.item
233
234 redef fun next
235 do
236 var seen = self.item
237 source.next
238 while source.is_ok and seen == source.item do
239 source.next
240 end
241 end
242 end
243
244 private class PipeJoin[E]
245 super Iterator[E]
246 var source1: Iterator[E]
247 var source2: Iterator[E]
248
249 redef fun is_ok
250 do
251 return source1.is_ok or source2.is_ok
252 end
253
254 redef fun item
255 do
256 if source1.is_ok then return source1.item else return source2.item
257 end
258
259 redef fun next
260 do
261 if source1.is_ok then source1.next else source2.next
262 end
263 end
264
265 private class PipeAlternate[E]
266 super Iterator[E]
267
268 var source: Iterator[E]
269 var odd_item: E
270 var odd = true
271
272 redef fun is_ok do return source.is_ok
273
274 redef fun item
275 do
276 if odd then
277 return source.item
278 else
279 return odd_item
280 end
281 end
282
283 redef fun next
284 do
285 if odd then
286 source.next
287 end
288 odd = not odd
289 end
290 end
291
292 private class PipeSkip[E]
293 super Iterator[E]
294
295 var source: Iterator[E]
296 var skip_item: E
297
298 init do do_skip
299
300 fun do_skip
301 do
302 while source.is_ok and source.item == skip_item do source.next
303 end
304
305 redef fun is_ok do return source.is_ok
306
307 redef fun item do return source.item
308
309 redef fun next
310 do
311 source.next
312 do_skip
313 end
314 end
315
316 private class PipeHead[E]
317 super Iterator[E]
318
319 var source: Iterator[E]
320
321 var length: Int
322
323 redef fun is_ok do return length > 0 and source.is_ok
324
325 redef fun item do return source.item
326
327 redef fun next
328 do
329 length -= 1
330 source.next
331 end
332 end
333
334 private class PipeSkipTail[E]
335 super Iterator[E]
336
337 var source: Iterator[E]
338
339 var length: Int
340
341 var lasts = new List[E]
342
343 init
344 do
345 var lasts = self.lasts
346 while source.is_ok and lasts.length < length do
347 lasts.push(source.item)
348 source.next
349 end
350 end
351
352 redef fun is_ok do return source.is_ok
353
354 redef fun item do return lasts.first
355
356 redef fun next
357 do
358 lasts.shift
359 lasts.push(source.item)
360 source.next
361 end
362 end
363
364 private class PipeSelect[E]
365 super Iterator[E]
366
367 var source: Iterator[E]
368
369 var predicate: Function[E, Bool]
370
371 init do do_skip
372
373 fun do_skip
374 do
375 while source.is_ok and not predicate.apply(source.item) do source.next
376 end
377
378 redef fun is_ok do return source.is_ok
379
380 redef fun item do return source.item
381
382 redef fun next
383 do
384 source.next
385 do_skip
386 end
387 end
388
389 private class PipeMap[E, F]
390 super Iterator[F]
391
392 var source: Iterator[E]
393 var function: Function[E, F]
394
395 var item_cache: nullable F = null
396 var item_cached = false
397
398 redef fun is_ok do return source.is_ok
399
400 redef fun item do
401 if item_cached then return item_cache
402 item_cache = function.apply(source.item)
403 item_cached = true
404 return item_cache
405 end
406
407 redef fun next do
408 source.next
409 item_cached = false
410 end
411 end
412
413 # Stateless singleton that reify to the `to_s` method.
414 #
415 # assert fun_to_s.apply(5) == "5"
416 fun fun_to_s: Function[Object, String] do return once new FunctionToS