lib: modify lib/pipeline to work on collection instead of iterators.
[nit.git] / lib / pipeline.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Pipelined filters and operations on iterators.
16 #
17 # This module enhance `Collection`s with some methods that enable a
18 # pipeline-like programing that offers the manupulation of
19 # collections trough connected filters with reasonable memory constraints.
20 #
21 # The filters methods return a view bounded on the original collection.
22 # Filters can be chained to build complex virtual collections.
23 #
24 # FIXME: Fix vocabulary (lazy/view/filter)
25 # FIXME: Maybe the name of the method should indicate that the method is 'lazy' (or is a filter).
26 module pipeline
27
28 redef interface Collection[E]
29 # Filter: sort with ComparableSorter.
30 # SEE: `sort_with` for details
31 # REQUIRE: self isa Iterator[Comparable]
32 #
33 # [1,3,2].sort_filter #=> [1,2,3]
34 fun sort_filter: Collection[E]
35 do
36 assert self isa Collection[Comparable]
37 var sorter = new ComparableSorter[Comparable]
38 var a = self.to_a
39 sorter.sort(a)
40 return a
41 end
42
43 # Filter: sort with a given `sorter`.
44 # Important: require O(n) memory.
45 #
46 # REQUIRE: self isa Iterator[Object]
47 # FIXME: AbstractSorter[E] is refused
48 fun sort_with(sorter: AbstractSorter[Object]): Collection[E]
49 do
50 assert self isa Collection[Object]
51 var a = self.to_a
52 sorter.sort(a)
53 return a
54 end
55
56 # Filter: reject duplicates.
57 # Elements already seen are rejected.
58 #
59 # Important: rely on `==` and `hash`
60 # Important: require O(m) in memory, where m is the total number of uniq items.
61 #
62 # [1,2,1,1,1,3,2].uniq #=> [1,2,3]
63 #
64 # REQUIRE: self isa Iterator[Object]
65 fun uniq: Collection[E]
66 do
67 assert self isa Collection[Object]
68 return new PipeUniq[E](self)
69 end
70
71 # Filter: reject continuous sequences of duplicates
72 #
73 # Important: rely on `==`.
74 #
75 # [1,2,1,1,1,3,2].seq_uniq #=> [1,2,1,3,2]
76 fun seq_uniq: Collection[E]
77 do
78 return new PipeSeqUniq[E](self)
79 end
80
81 # The view of two combined collections.
82 #
83 # ([1..20[ + [20..40[) #=> [1..40[
84 fun +(other: Collection[E]): Collection[E]
85 do
86 return new PipeJoin[E](self, other)
87 end
88
89 # Alternate each item with `e`.
90 #
91 # [1,2,3].alternate(0) #=> [1,0,2,0,3]
92 fun alternate(e: E): Collection[E]
93 do
94 return new PipeAlternate[E](self, e)
95 end
96
97 # Filter: reject a given `item`.
98 #
99 # [1,1,2,1,3].skip(1) #=> [2,3]
100 fun skip(item: E): Collection[E]
101 do
102 return new PipeSkip[E](self, item)
103 end
104
105 # Filter: keep only the first `length` items.
106 #
107 # [1,2,3,4,5].head(2) #=> [1,2]
108 fun head(length: Int): Collection[E]
109 do
110 return new PipeHead[E](self, length)
111 end
112
113 # Filter: reject the first `length` items.
114 #
115 # [1,2,3,4,5].skip_head(2) #=> [3,4,5]
116 #
117 # ENSURE: self == return
118 fun skip_head(length: Int): Collection[E]
119 do
120 return new PipeSkipHead[E](self, length)
121 end
122
123 # Filter: keep only the last `length` items.
124 #
125 # [1,2,3,4,5].tail(2) #=> [4,5]
126 #
127 # Important: require O(length) in memory
128 fun tail(length: Int): Collection[E]
129 do
130 return new PipeTail[E](self, length)
131 end
132
133 # Filter: reject the last `length` items.
134 #
135 # [1,2,3,4,5].skip_tail(2) #=> [1,2,3]
136 #
137 # Important: require O(length) in memory
138 fun skip_tail(length: Int): Collection[E]
139 do
140 return new PipeSkipTail[E](self, length)
141 end
142 end
143
144 ### Specific private collection and iterator classes
145
146 private class PipeUniq[E]
147 super NaiveCollection[E]
148 var source: Collection[E]
149 redef fun iterator do return new PipeUniqIterator[E](source.iterator)
150 end
151
152 private class PipeUniqIterator[E]
153 super Iterator[E]
154
155 var source: Iterator[E]
156
157 var seen = new HashSet[Object] # FIXME HashSet[E]
158
159 redef fun is_ok do return source.is_ok
160
161 redef fun item do return source.item
162
163 redef fun next
164 do
165 self.seen.add(self.item.as(Object))
166 source.next
167 while source.is_ok and self.seen.has(source.item.as(Object)) do
168 source.next
169 end
170 end
171 end
172
173 private class PipeSeqUniq[E]
174 super NaiveCollection[E]
175 var source: Collection[E]
176 redef fun iterator do return new PipeSeqUniqIterator[E](source.iterator)
177 end
178
179 private class PipeSeqUniqIterator[E]
180 super Iterator[E]
181
182 var source: Iterator[E]
183
184 redef fun is_ok do return source.is_ok
185
186 redef fun item do return source.item
187
188 redef fun next
189 do
190 var seen = self.item
191 source.next
192 while source.is_ok and seen == source.item do
193 source.next
194 end
195 end
196 end
197
198 private class PipeJoin[E]
199 super NaiveCollection[E]
200 var source1: Collection[E]
201 var source2: Collection[E]
202 redef fun iterator do return new PipeJoinIterator[E](source1.iterator, source2.iterator)
203 end
204
205 private class PipeJoinIterator[E]
206 super Iterator[E]
207 var source1: Iterator[E]
208 var source2: Iterator[E]
209
210 redef fun is_ok
211 do
212 return source1.is_ok or source2.is_ok
213 end
214
215 redef fun item
216 do
217 if source1.is_ok then return source1.item else return source2.item
218 end
219
220 redef fun next
221 do
222 if source1.is_ok then source1.next else source2.next
223 end
224 end
225
226 private class PipeAlternate[E]
227 super NaiveCollection[E]
228 var source: Collection[E]
229 var odd_item: E
230 redef fun iterator do return new PipeAlternateIterator[E](source.iterator, odd_item)
231 end
232
233 private class PipeAlternateIterator[E]
234 super Iterator[E]
235
236 var source: Iterator[E]
237 var odd_item: E
238 var odd = true
239
240 redef fun is_ok do return source.is_ok
241
242 redef fun item
243 do
244 if odd then
245 return source.item
246 else
247 return odd_item
248 end
249 end
250
251 redef fun next
252 do
253 if odd then
254 source.next
255 end
256 odd = not odd
257 end
258 end
259
260 private class PipeSkip[E]
261 super NaiveCollection[E]
262 var source: Collection[E]
263 var skip_item: E
264 redef fun iterator do return new PipeSkipIterator[E](source.iterator, skip_item)
265 end
266
267 private class PipeSkipIterator[E]
268 super NaiveCollection[E]
269 super Iterator[E]
270
271 var source: Iterator[E]
272 var skip_item: E
273
274 init(source: Iterator[E], skip_item: E)
275 do
276 self.source = source
277 self.skip_item = skip_item
278
279 do_skip
280 end
281
282 fun do_skip
283 do
284 while source.is_ok and source.item == skip_item do source.next
285 end
286
287 redef fun is_ok do return source.is_ok
288
289 redef fun item do return source.item
290
291 redef fun next
292 do
293 source.next
294 do_skip
295 end
296 end
297
298 private class PipeHead[E]
299 super NaiveCollection[E]
300 var source: Collection[E]
301 var pipe_length: Int
302 redef fun iterator do return new PipeHeadIterator[E](source.iterator, pipe_length)
303 end
304
305 private class PipeHeadIterator[E]
306 super Iterator[E]
307
308 var source: Iterator[E]
309
310 var length: Int
311
312 redef fun is_ok do return length > 0 and source.is_ok
313
314 redef fun item do return source.item
315
316 redef fun next
317 do
318 length -= 1
319 source.next
320 end
321 end
322
323 private class PipeSkipHead[E]
324 super NaiveCollection[E]
325 var source: Collection[E]
326 var pipe_length: Int
327 redef fun iterator
328 do
329 var res = source.iterator
330 var length = pipe_length
331 while length > 0 and res.is_ok do
332 length -= 1
333 res.next
334 end
335 return res
336 end
337 end
338
339 private class PipeTail[E]
340 super NaiveCollection[E]
341 var source: Collection[E]
342 var pipe_length: Int
343 redef fun iterator
344 do
345 var lasts = new List[E]
346 var res = source.iterator
347 var length = pipe_length
348 while res.is_ok do
349 while lasts.length >= length do lasts.shift
350 lasts.push(res.item)
351 res.next
352 end
353 return lasts.iterator
354 end
355 end
356
357 private class PipeSkipTail[E]
358 super NaiveCollection[E]
359 var source: Collection[E]
360 var pipe_length: Int
361 redef fun iterator do return new PipeSkipTailIterator[E](source.iterator, pipe_length)
362 end
363
364 private class PipeSkipTailIterator[E]
365 super Iterator[E]
366
367 var source: Iterator[E]
368
369 var length: Int
370
371 var lasts = new List[E]
372
373 init(source: Iterator[E], length: Int)
374 do
375 self.source = source
376 self.length = length
377 var lasts = self.lasts
378 while source.is_ok and lasts.length < length do
379 lasts.push(source.item)
380 source.next
381 end
382 end
383
384 redef fun is_ok do return source.is_ok
385
386 redef fun item do return lasts.first
387
388 redef fun next
389 do
390 lasts.shift
391 lasts.push(source.item)
392 source.next
393 end
394 end