pep8analysis: add copyright info for viz.js
[nit.git] / lib / pipeline.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Pipelined filters and operations on iterators.
16 #
17 # This module enhance `Iterator`s with some methods that enable a
18 # pipeline-like programing that offers the manupulation of
19 # collections trough connected filters with reasonable memory constraints.
20 module pipeline
21
22 redef interface Iterator[E]
23 # Filter: sort with `default_comparator`.
24 # SEE: `sort_with` for details
25 # REQUIRE: self isa Iterator[Comparable]
26 #
27 # assert [1,3,2].iterator.sort.to_a == [1,2,3]
28 fun sort: Iterator[E]
29 do
30 assert self isa Iterator[Comparable]
31 var a = self.to_a
32 default_comparator.sort(a)
33 return a.iterator
34 end
35
36 # Filter: sort with a given `comparator`.
37 # Important: require O(n) memory.
38 fun sort_with(comparator: Comparator[E]): Iterator[E]
39 do
40 var a = self.to_a
41 comparator.sort(a)
42 return a.iterator
43 end
44
45 # Filter: reject duplicates.
46 # Elements already seen are rejected.
47 #
48 # Important: rely on `==` and `hash`
49 # Important: require O(m) in memory, where m is the total number of uniq items.
50 #
51 # assert [1,2,1,1,1,3,2].iterator.uniq.to_a == [1,2,3]
52 #
53 # REQUIRE: self isa Iterator[Object]
54 fun uniq: Iterator[E]
55 do
56 assert self isa Iterator[Object]
57 return new PipeUniq[E](self)
58 end
59
60 # Filter: reject continuous sequences of duplicates
61 #
62 # Important: rely on `==`.
63 #
64 # assert [1,2,1,1,1,3,2].iterator.seq_uniq.to_a == [1,2,1,3,2]
65 fun seq_uniq: Iterator[E]
66 do
67 return new PipeSeqUniq[E](self)
68 end
69
70 # Combine two iterators.
71 #
72 # When the first iterator is terminated, the second is started.
73 #
74 # assert ([1..20[.iterator + [20..40[.iterator).to_a == ([1..40[).to_a
75 fun +(other: Iterator[E]): Iterator[E]
76 do
77 return new PipeJoin[E](self, other)
78 end
79
80 # Alternate each item with `e`.
81 #
82 # assert [1,2,3].iterator.alternate(0).to_a == [1,0,2,0,3]
83 fun alternate(e: E): Iterator[E]
84 do
85 return new PipeAlternate[E](self, e)
86 end
87
88 # Filter: reject a given `item`.
89 #
90 # assert [1,1,2,1,3].iterator.skip(1).to_a == [2,3]
91 fun skip(item: E): Iterator[E]
92 do
93 return new PipeSkip[E](self, item)
94 end
95
96 # Filter: keep only the first `length` items.
97 #
98 # This filter does not always consume `self'.
99 #
100 # var i = [1,2,3,4,5].iterator
101 # assert i.head(2).to_a == [1,2]
102 # assert i.to_a == [3,4,5]
103 fun head(length: Int): Iterator[E]
104 do
105 return new PipeHead[E](self, length)
106 end
107
108 # Filter: reject the first `length` items.
109 #
110 # assert [1,2,3,4,5].iterator.skip_head(2).to_a == [3,4,5]
111 #
112 # ENSURE: self == return
113 fun skip_head(length: Int): Iterator[E]
114 do
115 while length > 0 and self.is_ok do
116 length -= 1
117 self.next
118 end
119 return self
120 end
121
122 # Filter: keep only the last `length` items.
123 #
124 # assert [1,2,3,4,5].iterator.tail(2).to_a == [4,5]
125 #
126 # Important: require O(length) in memory
127 fun tail(length: Int): Iterator[E]
128 do
129 var lasts = new List[E]
130 while self.is_ok do
131 while lasts.length >= length do lasts.shift
132 lasts.push(self.item)
133 self.next
134 end
135 return lasts.iterator
136 end
137
138 # Filter: reject the last `length` items.
139 #
140 # assert [1,2,3,4,5].iterator.skip_tail(2).to_a == [1,2,3]
141 #
142 # Important: require O(length) in memory
143 fun skip_tail(length: Int): Iterator[E]
144 do
145 return new PipeSkipTail[E](self, length)
146 end
147
148 # Filter: reject items that does not meet some criteria.
149 #
150 # class IsEvenFunction
151 # super Function[Int, Bool]
152 # redef fun apply(i) do return i % 2 == 0
153 # end
154 # assert [1,2,3,4,8].iterator.select(new IsEvenFunction).to_a == [2,4,8]
155 fun select(predicate: Function[E, Bool]): Iterator[E]
156 do
157 return new PipeSelect[E](self, predicate)
158 end
159 end
160
161 # Interface that reify a function.
162 # Concrete subclasses must implements the `apply` method.
163 #
164 # This interface helps to manipulate function-like objects.
165 #
166 # The main usage it as a transformation; that takes an argument and produce a result.
167 # See `map` for example.
168 #
169 # Another usage is as a predicate, with `Function[E, Bool]`.
170 # See `Iterator::select` for example.
171 #
172 # Function with more than one argument can be reified with some uncurification.
173 # Eg. `Function[ARG1, Function[ARG2, RES]]`.
174 #
175 # NOTE: Nit is not a functionnal language, this class is a very basic way to
176 # simulate the reification of a simple function.
177 interface Function[FROM, TO]
178 # How an element is mapped to another one.
179 fun apply(e: FROM): TO is abstract
180
181 # Filter: produce an iterator which each element is transformed.
182 #
183 # var i = [1,2,3].iterator
184 # assert fun_to_s.map(i).to_a == ["1", "2", "3"]
185 #
186 # Note: because there is no generic method in Nit (yet?),
187 # there is no way to have a better API.
188 # eg. with the Iterator as receiver and the function as argument.
189 # (see `Iterator::select`)
190 fun map(i: Iterator[FROM]): Iterator[TO]
191 do
192 return new PipeMap[FROM, TO](i, self)
193 end
194 end
195
196 private class FunctionToS
197 super Function[Object, String]
198 redef fun apply(e) do return e.to_s
199 end
200
201 ### Specific private iterator classes
202
203 private class PipeUniq[E]
204 super Iterator[E]
205
206 var source: Iterator[E]
207
208 var seen = new HashSet[Object] # FIXME HashSet[E]
209
210 redef fun is_ok do return source.is_ok
211
212 redef fun item do return source.item
213
214 redef fun next
215 do
216 self.seen.add(self.item.as(Object))
217 source.next
218 while source.is_ok and self.seen.has(source.item.as(Object)) do
219 source.next
220 end
221 end
222 end
223
224 private class PipeSeqUniq[E]
225 super Iterator[E]
226
227 var source: Iterator[E]
228
229 redef fun is_ok do return source.is_ok
230
231 redef fun item do return source.item
232
233 redef fun next
234 do
235 var seen = self.item
236 source.next
237 while source.is_ok and seen == source.item do
238 source.next
239 end
240 end
241 end
242
243 private class PipeJoin[E]
244 super Iterator[E]
245 var source1: Iterator[E]
246 var source2: Iterator[E]
247
248 redef fun is_ok
249 do
250 return source1.is_ok or source2.is_ok
251 end
252
253 redef fun item
254 do
255 if source1.is_ok then return source1.item else return source2.item
256 end
257
258 redef fun next
259 do
260 if source1.is_ok then source1.next else source2.next
261 end
262 end
263
264 private class PipeAlternate[E]
265 super Iterator[E]
266
267 var source: Iterator[E]
268 var odd_item: E
269 var odd = true
270
271 redef fun is_ok do return source.is_ok
272
273 redef fun item
274 do
275 if odd then
276 return source.item
277 else
278 return odd_item
279 end
280 end
281
282 redef fun next
283 do
284 if odd then
285 source.next
286 end
287 odd = not odd
288 end
289 end
290
291 private class PipeSkip[E]
292 super Iterator[E]
293
294 var source: Iterator[E]
295 var skip_item: E
296
297 init(source: Iterator[E], skip_item: E)
298 do
299 self.source = source
300 self.skip_item = skip_item
301
302 do_skip
303 end
304
305 fun do_skip
306 do
307 while source.is_ok and source.item == skip_item do source.next
308 end
309
310 redef fun is_ok do return source.is_ok
311
312 redef fun item do return source.item
313
314 redef fun next
315 do
316 source.next
317 do_skip
318 end
319 end
320
321 private class PipeHead[E]
322 super Iterator[E]
323
324 var source: Iterator[E]
325
326 var length: Int
327
328 redef fun is_ok do return length > 0 and source.is_ok
329
330 redef fun item do return source.item
331
332 redef fun next
333 do
334 length -= 1
335 source.next
336 end
337 end
338
339 private class PipeSkipTail[E]
340 super Iterator[E]
341
342 var source: Iterator[E]
343
344 var length: Int
345
346 var lasts = new List[E]
347
348 init(source: Iterator[E], length: Int)
349 do
350 self.source = source
351 self.length = length
352 var lasts = self.lasts
353 while source.is_ok and lasts.length < length do
354 lasts.push(source.item)
355 source.next
356 end
357 end
358
359 redef fun is_ok do return source.is_ok
360
361 redef fun item do return lasts.first
362
363 redef fun next
364 do
365 lasts.shift
366 lasts.push(source.item)
367 source.next
368 end
369 end
370
371 private class PipeSelect[E]
372 super Iterator[E]
373
374 var source: Iterator[E]
375
376 var predicate: Function[E, Bool]
377
378 init(source: Iterator[E], predicate: Function[E, Bool])
379 do
380 self.source = source
381 self.predicate = predicate
382
383 do_skip
384 end
385
386 fun do_skip
387 do
388 while source.is_ok and not predicate.apply(source.item) do source.next
389 end
390
391 redef fun is_ok do return source.is_ok
392
393 redef fun item do return source.item
394
395 redef fun next
396 do
397 source.next
398 do_skip
399 end
400 end
401
402 private class PipeMap[E, F]
403 super Iterator[F]
404
405 var source: Iterator[E]
406 var function: Function[E, F]
407
408 var item_cache: nullable F = null
409 var item_cached = false
410
411 redef fun is_ok do return source.is_ok
412
413 redef fun item do
414 if item_cached then return item_cache
415 item_cache = function.apply(source.item)
416 item_cached = true
417 return item_cache
418 end
419
420 redef fun next do
421 source.next
422 item_cached = false
423 end
424 end
425
426 # Stateless singleton that reify to the `to_s` method.
427 #
428 # assert fun_to_s.apply(5) == "5"
429 fun fun_to_s: Function[Object, String] do return once new FunctionToS