lib: new API for Comparator
[nit.git] / lib / pipeline.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Pipelined filters and operations on iterators.
16 #
17 # This module enhance `Iterator`s with some methods that enable a
18 # pipeline-like programing that offers the manupulation of
19 # collections trough connected filters with reasonable memory constraints.
20 module pipeline
21
22 redef interface Iterator[E]
23 # Filter: sort with `default_comparator`.
24 # SEE: `sort_with` for details
25 # REQUIRE: self isa Iterator[Comparable]
26 #
27 # assert [1,3,2].iterator.sort.to_a == [1,2,3]
28 fun sort: Iterator[E]
29 do
30 assert self isa Iterator[Comparable]
31 var a = self.to_a
32 default_comparator.sort(a)
33 return a.iterator
34 end
35
36 # Filter: sort with a given `comparator`.
37 # Important: require O(n) memory.
38 fun sort_with(comparator: Comparator): Iterator[E]
39 do
40 var a = self.to_a
41 comparator.sort(a)
42 return a.iterator
43 end
44
45 # Filter: reject duplicates.
46 # Elements already seen are rejected.
47 #
48 # Important: rely on `==` and `hash`
49 # Important: require O(m) in memory, where m is the total number of uniq items.
50 #
51 # assert [1,2,1,1,1,3,2].iterator.uniq.to_a == [1,2,3]
52 #
53 # REQUIRE: self isa Iterator[Object]
54 fun uniq: Iterator[E]
55 do
56 assert self isa Iterator[Object]
57 return new PipeUniq[E](self)
58 end
59
60 # Filter: reject continuous sequences of duplicates
61 #
62 # Important: rely on `==`.
63 #
64 # assert [1,2,1,1,1,3,2].iterator.seq_uniq.to_a == [1,2,1,3,2]
65 fun seq_uniq: Iterator[E]
66 do
67 return new PipeSeqUniq[E](self)
68 end
69
70 # Combine two iterators.
71 #
72 # When the first iterator is terminated, the second is started.
73 #
74 # assert ([1..20[.iterator + [20..40[.iterator).to_a == ([1..40[).to_a
75 fun +(other: Iterator[E]): Iterator[E]
76 do
77 return new PipeJoin[E](self, other)
78 end
79
80 # Alternate each item with `e`.
81 #
82 # assert [1,2,3].iterator.alternate(0).to_a == [1,0,2,0,3]
83 fun alternate(e: E): Iterator[E]
84 do
85 return new PipeAlternate[E](self, e)
86 end
87
88 # Filter: reject a given `item`.
89 #
90 # assert [1,1,2,1,3].iterator.skip(1).to_a == [2,3]
91 fun skip(item: E): Iterator[E]
92 do
93 return new PipeSkip[E](self, item)
94 end
95
96 # Filter: keep only the first `length` items.
97 #
98 # This filter does not always consume `self'.
99 #
100 # var i = [1,2,3,4,5].iterator
101 # assert i.head(2).to_a == [1,2]
102 # assert i.to_a == [3,4,5]
103 fun head(length: Int): Iterator[E]
104 do
105 return new PipeHead[E](self, length)
106 end
107
108 # Filter: reject the first `length` items.
109 #
110 # assert [1,2,3,4,5].iterator.skip_head(2).to_a == [3,4,5]
111 #
112 # ENSURE: self == return
113 fun skip_head(length: Int): Iterator[E]
114 do
115 while length > 0 and self.is_ok do
116 length -= 1
117 self.next
118 end
119 return self
120 end
121
122 # Filter: keep only the last `length` items.
123 #
124 # assert [1,2,3,4,5].iterator.tail(2).to_a == [4,5]
125 #
126 # Important: require O(length) in memory
127 fun tail(length: Int): Iterator[E]
128 do
129 var lasts = new List[E]
130 while self.is_ok do
131 while lasts.length >= length do lasts.shift
132 lasts.push(self.item)
133 self.next
134 end
135 return lasts.iterator
136 end
137
138 # Filter: reject the last `length` items.
139 #
140 # assert [1,2,3,4,5].iterator.skip_tail(2).to_a == [1,2,3]
141 #
142 # Important: require O(length) in memory
143 fun skip_tail(length: Int): Iterator[E]
144 do
145 return new PipeSkipTail[E](self, length)
146 end
147
148 # Filter: reject items that does not meet some criteria.
149 #
150 # class IsEvenFunction
151 # super Function[Int, Bool]
152 # redef fun apply(i) do return i % 2 == 0
153 # end
154 # assert [1,2,3,4,8].iterator.select(new IsEvenFunction).to_a == [2,4,8]
155 fun select(predicate: Function[E, Bool]): Iterator[E]
156 do
157 return new PipeSelect[E](self, predicate)
158 end
159 end
160
161 # Interface that reify a function.
162 # Concrete subclasses must implements the `apply` method.
163 #
164 # This interface helps to manipulate function-like objects.
165 #
166 # The main usage it as a transformation; that takes an argument and produce a result.
167 # See `map` for example.
168 #
169 # Another usage is as a predicate, with `Function[E, Bool]`.
170 # See `Iterator::select` for example.
171 #
172 # Function with more than one argument can be reified with some uncurification.
173 # Eg. `Function[ARG1, Function[ARG2, RES]]`.
174 #
175 # NOTE: Nit is not a functionnal language, this class is a very basic way to
176 # simulate the reification of a simple function.
177 interface Function[FROM, TO]
178 # How an element is mapped to another one.
179 fun apply(e: FROM): TO is abstract
180
181 # Filter: produce an iterator which each element is transformed.
182 #
183 # var i = [1,2,3].iterator
184 # assert fun_to_s.map(i).to_a == ["1", "2", "3"]
185 #
186 # Note: because there is no generic method in Nit (yet?),
187 # there is no way to have a better API.
188 # eg. with the Iterator as receiver and the function as argument.
189 # (see `Iterator::select`)
190 fun map(i: Iterator[FROM]): Iterator[TO]
191 do
192 return new PipeMap[FROM, TO](i, self)
193 end
194 end
195
196 private class FunctionToS
197 super Function[Object, String]
198 redef fun apply(e) do return e.to_s
199 end
200
201 ### Specific private iterator classes
202
203 private class PipeUniq[E]
204 super Iterator[E]
205
206 var source: Iterator[E]
207
208 var seen = new HashSet[Object] # FIXME HashSet[E]
209
210 redef fun is_ok do return source.is_ok
211
212 redef fun item do return source.item
213
214 redef fun next
215 do
216 self.seen.add(self.item.as(Object))
217 source.next
218 while source.is_ok and self.seen.has(source.item.as(Object)) do
219 source.next
220 end
221 end
222 end
223
224 private class PipeSeqUniq[E]
225 super Iterator[E]
226
227 var source: Iterator[E]
228
229 redef fun is_ok do return source.is_ok
230
231 redef fun item do return source.item
232
233 redef fun next
234 do
235 var seen = self.item
236 source.next
237 while source.is_ok and seen == source.item do
238 source.next
239 end
240 end
241 end
242
243 private class PipeJoin[E]
244 super Iterator[E]
245 var source1: Iterator[E]
246 var source2: Iterator[E]
247
248 redef fun is_ok
249 do
250 return source1.is_ok or source2.is_ok
251 end
252
253 redef fun item
254 do
255 if source1.is_ok then return source1.item else return source2.item
256 end
257
258 redef fun next
259 do
260 if source1.is_ok then source1.next else source2.next
261 end
262 end
263
264 private class PipeAlternate[E]
265 super Iterator[E]
266
267 var source: Iterator[E]
268 var odd_item: E
269 var odd = true
270
271 redef fun is_ok do return source.is_ok
272
273 redef fun item
274 do
275 if odd then
276 return source.item
277 else
278 return odd_item
279 end
280 end
281
282 redef fun next
283 do
284 if odd then
285 source.next
286 end
287 odd = not odd
288 end
289 end
290
291 private class PipeSkip[E]
292 super Iterator[E]
293
294 var source: Iterator[E]
295 var skip_item: E
296
297 init do do_skip
298
299 fun do_skip
300 do
301 while source.is_ok and source.item == skip_item do source.next
302 end
303
304 redef fun is_ok do return source.is_ok
305
306 redef fun item do return source.item
307
308 redef fun next
309 do
310 source.next
311 do_skip
312 end
313 end
314
315 private class PipeHead[E]
316 super Iterator[E]
317
318 var source: Iterator[E]
319
320 var length: Int
321
322 redef fun is_ok do return length > 0 and source.is_ok
323
324 redef fun item do return source.item
325
326 redef fun next
327 do
328 length -= 1
329 source.next
330 end
331 end
332
333 private class PipeSkipTail[E]
334 super Iterator[E]
335
336 var source: Iterator[E]
337
338 var length: Int
339
340 var lasts = new List[E]
341
342 init
343 do
344 var lasts = self.lasts
345 while source.is_ok and lasts.length < length do
346 lasts.push(source.item)
347 source.next
348 end
349 end
350
351 redef fun is_ok do return source.is_ok
352
353 redef fun item do return lasts.first
354
355 redef fun next
356 do
357 lasts.shift
358 lasts.push(source.item)
359 source.next
360 end
361 end
362
363 private class PipeSelect[E]
364 super Iterator[E]
365
366 var source: Iterator[E]
367
368 var predicate: Function[E, Bool]
369
370 init do do_skip
371
372 fun do_skip
373 do
374 while source.is_ok and not predicate.apply(source.item) do source.next
375 end
376
377 redef fun is_ok do return source.is_ok
378
379 redef fun item do return source.item
380
381 redef fun next
382 do
383 source.next
384 do_skip
385 end
386 end
387
388 private class PipeMap[E, F]
389 super Iterator[F]
390
391 var source: Iterator[E]
392 var function: Function[E, F]
393
394 var item_cache: nullable F = null
395 var item_cached = false
396
397 redef fun is_ok do return source.is_ok
398
399 redef fun item do
400 if item_cached then return item_cache
401 item_cache = function.apply(source.item)
402 item_cached = true
403 return item_cache
404 end
405
406 redef fun next do
407 source.next
408 item_cached = false
409 end
410 end
411
412 # Stateless singleton that reify to the `to_s` method.
413 #
414 # assert fun_to_s.apply(5) == "5"
415 fun fun_to_s: Function[Object, String] do return once new FunctionToS