lib: modify lib/pipeline to work on iterators instead of collection
[nit.git] / lib / pipeline.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Pipelined filters and operations on iterators.
16 #
17 # This module enhance `Iterator`s with some methods that enable a
18 # pipeline-like programing that offers the manupulation of
19 # collections trough connected filters with reasonable memory constraints.
20 module pipeline
21
22 redef interface Iterator[E]
23 # Filter: sort with `default_comparator`.
24 # SEE: `sort_with` for details
25 # REQUIRE: self isa Iterator[Comparable]
26 #
27 # assert [1,3,2].iterator.sort.to_a == [1,2,3]
28 fun sort: Iterator[E]
29 do
30 assert self isa Iterator[Comparable]
31 var a = self.to_a
32 default_comparator.sort(a)
33 return a.iterator
34 end
35
36 # Filter: sort with a given `comparator`.
37 # Important: require O(n) memory.
38 fun sort_with(comparator: Comparator[E]): Iterator[E]
39 do
40 var a = self.to_a
41 comparator.sort(a)
42 return a.iterator
43 end
44
45 # Filter: reject duplicates.
46 # Elements already seen are rejected.
47 #
48 # Important: rely on `==` and `hash`
49 # Important: require O(m) in memory, where m is the total number of uniq items.
50 #
51 # assert [1,2,1,1,1,3,2].iterator.uniq.to_a == [1,2,3]
52 #
53 # REQUIRE: self isa Iterator[Object]
54 fun uniq: Iterator[E]
55 do
56 assert self isa Iterator[Object]
57 return new PipeUniq[E](self)
58 end
59
60 # Filter: reject continuous sequences of duplicates
61 #
62 # Important: rely on `==`.
63 #
64 # assert [1,2,1,1,1,3,2].iterator.seq_uniq.to_a == [1,2,1,3,2]
65 fun seq_uniq: Iterator[E]
66 do
67 return new PipeSeqUniq[E](self)
68 end
69
70 # Combine two iterators.
71 #
72 # When the first iterator is terminated, the second is started.
73 #
74 # assert ([1..20[.iterator + [20..40[.iterator).to_a == ([1..40[).to_a
75 fun +(other: Iterator[E]): Iterator[E]
76 do
77 return new PipeJoin[E](self, other)
78 end
79
80 # Alternate each item with `e`.
81 #
82 # assert [1,2,3].iterator.alternate(0).to_a == [1,0,2,0,3]
83 fun alternate(e: E): Iterator[E]
84 do
85 return new PipeAlternate[E](self, e)
86 end
87
88 # Filter: reject a given `item`.
89 #
90 # assert [1,1,2,1,3].iterator.skip(1).to_a == [2,3]
91 fun skip(item: E): Iterator[E]
92 do
93 return new PipeSkip[E](self, item)
94 end
95
96 # Filter: keep only the first `length` items.
97 #
98 # This filter does not always consume `self'.
99 #
100 # var i = [1,2,3,4,5].iterator
101 # assert i.head(2).to_a == [1,2]
102 # i.to_a == [3,4,5]
103 fun head(length: Int): Iterator[E]
104 do
105 return new PipeHead[E](self, length)
106 end
107
108 # Filter: reject the first `length` items.
109 #
110 # assert [1,2,3,4,5].iterator.skip_head(2).to_a == [3,4,5]
111 #
112 # ENSURE: self == return
113 fun skip_head(length: Int): Iterator[E]
114 do
115 while length > 0 and self.is_ok do
116 length -= 1
117 self.next
118 end
119 return self
120 end
121
122 # Filter: keep only the last `length` items.
123 #
124 # assert [1,2,3,4,5].iterator.tail(2).to_a == [4,5]
125 #
126 # Important: require O(length) in memory
127 fun tail(length: Int): Iterator[E]
128 do
129 var lasts = new List[E]
130 while self.is_ok do
131 while lasts.length >= length do lasts.shift
132 lasts.push(self.item)
133 self.next
134 end
135 return lasts.iterator
136 end
137
138 # Filter: reject the last `length` items.
139 #
140 # assert [1,2,3,4,5].iterator.skip_tail(2).to_a == [1,2,3]
141 #
142 # Important: require O(length) in memory
143 fun skip_tail(length: Int): Iterator[E]
144 do
145 return new PipeSkipTail[E](self, length)
146 end
147 end
148
149 ### Specific private iterator classes
150
151 private class PipeUniq[E]
152 super Iterator[E]
153
154 var source: Iterator[E]
155
156 var seen = new HashSet[Object] # FIXME HashSet[E]
157
158 redef fun is_ok do return source.is_ok
159
160 redef fun item do return source.item
161
162 redef fun next
163 do
164 self.seen.add(self.item.as(Object))
165 source.next
166 while source.is_ok and self.seen.has(source.item.as(Object)) do
167 source.next
168 end
169 end
170 end
171
172 private class PipeSeqUniq[E]
173 super Iterator[E]
174
175 var source: Iterator[E]
176
177 redef fun is_ok do return source.is_ok
178
179 redef fun item do return source.item
180
181 redef fun next
182 do
183 var seen = self.item
184 source.next
185 while source.is_ok and seen == source.item do
186 source.next
187 end
188 end
189 end
190
191 private class PipeJoin[E]
192 super Iterator[E]
193 var source1: Iterator[E]
194 var source2: Iterator[E]
195
196 redef fun is_ok
197 do
198 return source1.is_ok or source2.is_ok
199 end
200
201 redef fun item
202 do
203 if source1.is_ok then return source1.item else return source2.item
204 end
205
206 redef fun next
207 do
208 if source1.is_ok then source1.next else source2.next
209 end
210 end
211
212 private class PipeAlternate[E]
213 super Iterator[E]
214
215 var source: Iterator[E]
216 var odd_item: E
217 var odd = true
218
219 redef fun is_ok do return source.is_ok
220
221 redef fun item
222 do
223 if odd then
224 return source.item
225 else
226 return odd_item
227 end
228 end
229
230 redef fun next
231 do
232 if odd then
233 source.next
234 end
235 odd = not odd
236 end
237 end
238
239 private class PipeSkip[E]
240 super Iterator[E]
241
242 var source: Iterator[E]
243 var skip_item: E
244
245 init(source: Iterator[E], skip_item: E)
246 do
247 self.source = source
248 self.skip_item = skip_item
249
250 do_skip
251 end
252
253 fun do_skip
254 do
255 while source.is_ok and source.item == skip_item do source.next
256 end
257
258 redef fun is_ok do return source.is_ok
259
260 redef fun item do return source.item
261
262 redef fun next
263 do
264 source.next
265 do_skip
266 end
267 end
268
269 private class PipeHead[E]
270 super Iterator[E]
271
272 var source: Iterator[E]
273
274 var length: Int
275
276 redef fun is_ok do return length > 0 and source.is_ok
277
278 redef fun item do return source.item
279
280 redef fun next
281 do
282 length -= 1
283 source.next
284 end
285 end
286
287 private class PipeSkipTail[E]
288 super Iterator[E]
289
290 var source: Iterator[E]
291
292 var length: Int
293
294 var lasts = new List[E]
295
296 init(source: Iterator[E], length: Int)
297 do
298 self.source = source
299 self.length = length
300 var lasts = self.lasts
301 while source.is_ok and lasts.length < length do
302 lasts.push(source.item)
303 source.next
304 end
305 end
306
307 redef fun is_ok do return source.is_ok
308
309 redef fun item do return lasts.first
310
311 redef fun next
312 do
313 lasts.shift
314 lasts.push(source.item)
315 source.next
316 end
317 end