lib: new module pipeline
[nit.git] / lib / pipeline.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Pipelined filters and operations on iterators.
16 #
17 # This module enhance `Iterator`s with some methods that enable a
18 # pipeline-like programing that offers the manupulation of
19 # collections trough connected filters with reasonable memory constraints.
20 module pipeline
21
22 redef interface Iterator[E]
23 # Filter: sort with ComparableSorter.
24 # SEE: `sort_with` for details
25 # REQUIRE: self isa Iterator[Comparable]
26 #
27 # [1,3,2].iterator.sort.to_a #=> [1,2,3]
28 fun sort: Iterator[E]
29 do
30 assert self isa Iterator[Comparable]
31 var sorter = new ComparableSorter[Comparable]
32 var a = self.to_a
33 sorter.sort(a)
34 return a.iterator
35 end
36
37 # Filter: sort with a given `sorter`.
38 # Important: require O(n) memory.
39 #
40 # REQUIRE: self isa Iterator[Object]
41 # FIXME: AbstractSorter[E] is refused
42 fun sort_with(sorter: AbstractSorter[Object]): Iterator[E]
43 do
44 assert self isa Iterator[Object]
45 var a = self.to_a
46 sorter.sort(a)
47 return a.iterator
48 end
49
50 # Filter: reject duplicates.
51 # Elements already seen are rejected.
52 #
53 # Important: rely on `==` and `hash`
54 # Important: require O(m) in memory, where m is the total number of uniq items.
55 #
56 # [1,2,1,1,1,3,2].iterator.uniq.to_a #=> [1,2,3]
57 #
58 # REQUIRE: self isa Iterator[Object]
59 fun uniq: Iterator[E]
60 do
61 assert self isa Iterator[Object]
62 return new PipeUniq[E](self)
63 end
64
65 # Filter: reject continuous sequences of duplicates
66 #
67 # Important: rely on `==`.
68 #
69 # [1,2,1,1,1,3,2].iterator.uniq.to_a #=> [1,2,1,3,2]
70 fun seq_uniq: Iterator[E]
71 do
72 return new PipeSeqUniq[E](self)
73 end
74
75 # Combine two iterators.
76 #
77 # When the first iterator is terminated, the second is started.
78 #
79 # ([1,2].iterator + [3,4].iterator).to_a #=> [1,2,3,4]
80 fun +(other: Iterator[E]): Iterator[E]
81 do
82 return new PipeJoin[E](self, other)
83 end
84
85 # Alternate each item with `e`.
86 #
87 # [1,2,3].iterator.alternate(0).to_a #=> [1,0,2,0,3]
88 fun alternate(e: E): Iterator[E]
89 do
90 return new PipeAlternate[E](self, e)
91 end
92
93 # Filter: reject a given `item`.
94 #
95 # [1,1,2,1,3].iterator.skip(1).to_a #=> [2,3]
96 fun skip(item: E): Iterator[E]
97 do
98 return new PipeSkip[E](self, item)
99 end
100
101 # Filter: keep only the first `length` items.
102 #
103 # This filter does not always consume `self'.
104 #
105 # var i = [1,2,3,4,5].iterator
106 # i.head(2).to_a #=> [1,2]
107 # i.to_a #=> [3,4,5]
108 fun head(length: Int): Iterator[E]
109 do
110 return new PipeHead[E](self, length)
111 end
112
113 # Filter: reject the first `length` items.
114 #
115 # [1,2,3,4,5].iterator.skip_head(2).to_a #=> [3,4,5]
116 #
117 # ENSURE: self == return
118 fun skip_head(length: Int): Iterator[E]
119 do
120 while length > 0 and self.is_ok do
121 length -= 1
122 self.next
123 end
124 return self
125 end
126
127 # Filter: keep only the last `length` items.
128 #
129 # [1,2,3,4,5].iterator.tail(2).to_a #=> [4,5]
130 #
131 # Important: require O(length) in memory
132 fun tail(length: Int): Iterator[E]
133 do
134 var lasts = new List[E]
135 while self.is_ok do
136 while lasts.length >= length do lasts.shift
137 lasts.push(self.item)
138 self.next
139 end
140 return lasts.iterator
141 end
142
143 # Filter: reject the last `length` items.
144 #
145 # [1,2,3,4,5].iterator.skip_tail(2).to_a #=> [1,2,3]
146 #
147 # Important: require O(length) in memory
148 fun skip_tail(length: Int): Iterator[E]
149 do
150 return new PipeSkipTail[E](self, length)
151 end
152 end
153
154 ### Specific private iterator classes
155
156 private class PipeUniq[E]
157 super Iterator[E]
158
159 var source: Iterator[E]
160
161 var seen = new HashSet[Object] # FIXME HashSet[E]
162
163 redef fun is_ok do return source.is_ok
164
165 redef fun item do return source.item
166
167 redef fun next
168 do
169 self.seen.add(self.item.as(Object))
170 source.next
171 while source.is_ok and self.seen.has(source.item.as(Object)) do
172 source.next
173 end
174 end
175 end
176
177 private class PipeSeqUniq[E]
178 super Iterator[E]
179
180 var source: Iterator[E]
181
182 redef fun is_ok do return source.is_ok
183
184 redef fun item do return source.item
185
186 redef fun next
187 do
188 var seen = self.item
189 source.next
190 while source.is_ok and seen == source.item do
191 source.next
192 end
193 end
194 end
195
196 private class PipeJoin[E]
197 super Iterator[E]
198 var source1: Iterator[E]
199 var source2: Iterator[E]
200
201 redef fun is_ok
202 do
203 return source1.is_ok or source2.is_ok
204 end
205
206 redef fun item
207 do
208 if source1.is_ok then return source1.item else return source2.item
209 end
210
211 redef fun next
212 do
213 if source1.is_ok then source1.next else source2.next
214 end
215 end
216
217 private class PipeAlternate[E]
218 super Iterator[E]
219
220 var source: Iterator[E]
221 var odd_item: E
222 var odd = true
223
224 redef fun is_ok do return source.is_ok
225
226 redef fun item
227 do
228 if odd then
229 return source.item
230 else
231 return odd_item
232 end
233 end
234
235 redef fun next
236 do
237 if odd then
238 source.next
239 end
240 odd = not odd
241 end
242 end
243
244 private class PipeSkip[E]
245 super Iterator[E]
246
247 var source: Iterator[E]
248 var skip_item: E
249
250 init(source: Iterator[E], skip_item: E)
251 do
252 self.source = source
253 self.skip_item = skip_item
254
255 do_skip
256 end
257
258 fun do_skip
259 do
260 while source.is_ok and source.item == skip_item do source.next
261 end
262
263 redef fun is_ok do return source.is_ok
264
265 redef fun item do return source.item
266
267 redef fun next
268 do
269 source.next
270 do_skip
271 end
272 end
273
274 private class PipeHead[E]
275 super Iterator[E]
276
277 var source: Iterator[E]
278
279 var length: Int
280
281 redef fun is_ok do return length > 0 and source.is_ok
282
283 redef fun item do return source.item
284
285 redef fun next
286 do
287 length -= 1
288 source.next
289 end
290 end
291
292 private class PipeSkipTail[E]
293 super Iterator[E]
294
295 var source: Iterator[E]
296
297 var length: Int
298
299 var lasts = new List[E]
300
301 init(source: Iterator[E], length: Int)
302 do
303 self.source = source
304 self.length = length
305 var lasts = self.lasts
306 while source.is_ok and lasts.length < length do
307 lasts.push(source.item)
308 source.next
309 end
310 end
311
312 redef fun is_ok do return source.is_ok
313
314 redef fun item do return lasts.first
315
316 redef fun next
317 do
318 lasts.shift
319 lasts.push(source.item)
320 source.next
321 end
322 end