migration: use Comparator instead of Sorter in code
[nit.git] / lib / pipeline.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Pipelined filters and operations on iterators.
16 #
17 # This module enhance `Collection`s with some methods that enable a
18 # pipeline-like programing that offers the manupulation of
19 # collections trough connected filters with reasonable memory constraints.
20 #
21 # The filters methods return a view bounded on the original collection.
22 # Filters can be chained to build complex virtual collections.
23 #
24 # FIXME: Fix vocabulary (lazy/view/filter)
25 # FIXME: Maybe the name of the method should indicate that the method is 'lazy' (or is a filter).
26 module pipeline
27
28 redef interface Collection[E]
29 # Filter: sort with `default_comparator`.
30 # SEE: `sort_with` for details
31 # REQUIRE: self isa Iterator[Comparable]
32 #
33 # assert [1,3,2].sort_filter.to_a == [1,2,3]
34 fun sort_filter: Collection[E]
35 do
36 assert self isa Collection[Comparable]
37 var a = self.to_a
38 default_comparator.sort(a)
39 return a
40 end
41
42 # Filter: sort with a given `comparator`.
43 # Important: require O(n) memory.
44 fun sort_with(comparator: Comparator[E]): Collection[E]
45 do
46 var a = self.to_a
47 comparator.sort(a)
48 return a
49 end
50
51 # Filter: reject duplicates.
52 # Elements already seen are rejected.
53 #
54 # Important: rely on `==` and `hash`
55 # Important: require O(m) in memory, where m is the total number of uniq items.
56 #
57 # assert [1,2,1,1,1,3,2].uniq.to_a == [1,2,3]
58 #
59 # REQUIRE: self isa Iterator[Object]
60 fun uniq: Collection[E]
61 do
62 assert self isa Collection[Object]
63 return new PipeUniq[E](self)
64 end
65
66 # Filter: reject continuous sequences of duplicates
67 #
68 # Important: rely on `==`.
69 #
70 # assert [1,2,1,1,1,3,2].seq_uniq.to_a == [1,2,1,3,2]
71 fun seq_uniq: Collection[E]
72 do
73 return new PipeSeqUniq[E](self)
74 end
75
76 # The view of two combined collections.
77 #
78 # assert ([1..20[ + [20..40[).to_a == ([1..40[).to_a
79 fun +(other: Collection[E]): Collection[E]
80 do
81 return new PipeJoin[E](self, other)
82 end
83
84 # Alternate each item with `e`.
85 #
86 # assert [1,2,3].alternate(0).to_a == [1,0,2,0,3]
87 fun alternate(e: E): Collection[E]
88 do
89 return new PipeAlternate[E](self, e)
90 end
91
92 # Filter: reject a given `item`.
93 #
94 # assert [1,1,2,1,3].skip(1).to_a == [2,3]
95 fun skip(item: E): Collection[E]
96 do
97 return new PipeSkip[E](self, item)
98 end
99
100 # Filter: keep only the first `length` items.
101 #
102 # assert [1,2,3,4,5].head(2).to_a == [1,2]
103 fun head(length: Int): Collection[E]
104 do
105 return new PipeHead[E](self, length)
106 end
107
108 # Filter: reject the first `length` items.
109 #
110 # assert [1,2,3,4,5].skip_head(2).to_a == [3,4,5]
111 #
112 # ENSURE: self == return
113 fun skip_head(length: Int): Collection[E]
114 do
115 return new PipeSkipHead[E](self, length)
116 end
117
118 # Filter: keep only the last `length` items.
119 #
120 # assert [1,2,3,4,5].tail(2).to_a == [4,5]
121 #
122 # Important: require O(length) in memory
123 fun tail(length: Int): Collection[E]
124 do
125 return new PipeTail[E](self, length)
126 end
127
128 # Filter: reject the last `length` items.
129 #
130 # assert [1,2,3,4,5].skip_tail(2).to_a == [1,2,3]
131 #
132 # Important: require O(length) in memory
133 fun skip_tail(length: Int): Collection[E]
134 do
135 return new PipeSkipTail[E](self, length)
136 end
137 end
138
139 ### Specific private collection and iterator classes
140
141 private class PipeUniq[E]
142 super Collection[E]
143 var source: Collection[E]
144 redef fun iterator do return new PipeUniqIterator[E](source.iterator)
145 end
146
147 private class PipeUniqIterator[E]
148 super Iterator[E]
149
150 var source: Iterator[E]
151
152 var seen = new HashSet[Object] # FIXME HashSet[E]
153
154 redef fun is_ok do return source.is_ok
155
156 redef fun item do return source.item
157
158 redef fun next
159 do
160 self.seen.add(self.item.as(Object))
161 source.next
162 while source.is_ok and self.seen.has(source.item.as(Object)) do
163 source.next
164 end
165 end
166 end
167
168 private class PipeSeqUniq[E]
169 super Collection[E]
170 var source: Collection[E]
171 redef fun iterator do return new PipeSeqUniqIterator[E](source.iterator)
172 end
173
174 private class PipeSeqUniqIterator[E]
175 super Iterator[E]
176
177 var source: Iterator[E]
178
179 redef fun is_ok do return source.is_ok
180
181 redef fun item do return source.item
182
183 redef fun next
184 do
185 var seen = self.item
186 source.next
187 while source.is_ok and seen == source.item do
188 source.next
189 end
190 end
191 end
192
193 private class PipeJoin[E]
194 super Collection[E]
195 var source1: Collection[E]
196 var source2: Collection[E]
197 redef fun iterator do return new PipeJoinIterator[E](source1.iterator, source2.iterator)
198 end
199
200 private class PipeJoinIterator[E]
201 super Iterator[E]
202 var source1: Iterator[E]
203 var source2: Iterator[E]
204
205 redef fun is_ok
206 do
207 return source1.is_ok or source2.is_ok
208 end
209
210 redef fun item
211 do
212 if source1.is_ok then return source1.item else return source2.item
213 end
214
215 redef fun next
216 do
217 if source1.is_ok then source1.next else source2.next
218 end
219 end
220
221 private class PipeAlternate[E]
222 super Collection[E]
223 var source: Collection[E]
224 var odd_item: E
225 redef fun iterator do return new PipeAlternateIterator[E](source.iterator, odd_item)
226 end
227
228 private class PipeAlternateIterator[E]
229 super Iterator[E]
230
231 var source: Iterator[E]
232 var odd_item: E
233 var odd = true
234
235 redef fun is_ok do return source.is_ok
236
237 redef fun item
238 do
239 if odd then
240 return source.item
241 else
242 return odd_item
243 end
244 end
245
246 redef fun next
247 do
248 if odd then
249 source.next
250 end
251 odd = not odd
252 end
253 end
254
255 private class PipeSkip[E]
256 super Collection[E]
257 var source: Collection[E]
258 var skip_item: E
259 redef fun iterator do return new PipeSkipIterator[E](source.iterator, skip_item)
260 end
261
262 private class PipeSkipIterator[E]
263 super Collection[E]
264 super Iterator[E]
265
266 var source: Iterator[E]
267 var skip_item: E
268
269 init(source: Iterator[E], skip_item: E)
270 do
271 self.source = source
272 self.skip_item = skip_item
273
274 do_skip
275 end
276
277 fun do_skip
278 do
279 while source.is_ok and source.item == skip_item do source.next
280 end
281
282 redef fun is_ok do return source.is_ok
283
284 redef fun item do return source.item
285
286 redef fun next
287 do
288 source.next
289 do_skip
290 end
291 end
292
293 private class PipeHead[E]
294 super Collection[E]
295 var source: Collection[E]
296 var pipe_length: Int
297 redef fun iterator do return new PipeHeadIterator[E](source.iterator, pipe_length)
298 end
299
300 private class PipeHeadIterator[E]
301 super Iterator[E]
302
303 var source: Iterator[E]
304
305 var length: Int
306
307 redef fun is_ok do return length > 0 and source.is_ok
308
309 redef fun item do return source.item
310
311 redef fun next
312 do
313 length -= 1
314 source.next
315 end
316 end
317
318 private class PipeSkipHead[E]
319 super Collection[E]
320 var source: Collection[E]
321 var pipe_length: Int
322 redef fun iterator
323 do
324 var res = source.iterator
325 var length = pipe_length
326 while length > 0 and res.is_ok do
327 length -= 1
328 res.next
329 end
330 return res
331 end
332 end
333
334 private class PipeTail[E]
335 super Collection[E]
336 var source: Collection[E]
337 var pipe_length: Int
338 redef fun iterator
339 do
340 var lasts = new List[E]
341 var res = source.iterator
342 var length = pipe_length
343 while res.is_ok do
344 while lasts.length >= length do lasts.shift
345 lasts.push(res.item)
346 res.next
347 end
348 return lasts.iterator
349 end
350 end
351
352 private class PipeSkipTail[E]
353 super Collection[E]
354 var source: Collection[E]
355 var pipe_length: Int
356 redef fun iterator do return new PipeSkipTailIterator[E](source.iterator, pipe_length)
357 end
358
359 private class PipeSkipTailIterator[E]
360 super Iterator[E]
361
362 var source: Iterator[E]
363
364 var length: Int
365
366 var lasts = new List[E]
367
368 init(source: Iterator[E], length: Int)
369 do
370 self.source = source
371 self.length = length
372 var lasts = self.lasts
373 while source.is_ok and lasts.length < length do
374 lasts.push(source.item)
375 source.next
376 end
377 end
378
379 redef fun is_ok do return source.is_ok
380
381 redef fun item do return lasts.first
382
383 redef fun next
384 do
385 lasts.shift
386 lasts.push(source.item)
387 source.next
388 end
389 end