lib/standard/ropes: Added a final forward iterator on the chars.
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatString
82
83 init(val: FlatString) do
84 self.str = val
85 length = str.length
86 end
87
88 end
89
90 # Basic structure, binary tree with a root node.
91 #
92 # Also shared services by subsequent implementations.
93 abstract class Rope
94 super Text
95
96 # Root node, entry point of a Rope.
97 private var root: RopeNode
98
99 # Empty Rope
100 init do from("")
101
102 # Creates a new Rope with `s` as root
103 init from(s: String) do
104 if s isa RopeString then root = s.root else root = new Leaf(s.as(FlatString))
105 end
106
107 private init from_root(r: RopeNode)
108 do
109 root = r
110 end
111
112 redef fun length do return root.length
113
114 # Iterator on the nodes of the rope, in forward postfix order
115 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
116
117 # Iterator on the leaves of the rope, forward order
118 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
119
120 # Iterator on the substrings from 0, in forward order
121 fun substrings: IndexedIterator[Text] do return new SubstringsIterator(self, 0)
122
123 # Iterator on the substrings, starting at position `from`, in forward order
124 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
125
126 # Path to the Leaf for `position`
127 private fun node_at(position: Int): Path
128 do
129 assert position >= 0 and position < length
130 return get_node_from(root.as(not null), 0, position, new List[PathElement])
131 end
132
133 # Builds the path to Leaf at position `seek_pos`
134 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
135 do
136 assert curr_pos >= 0
137 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
138 node = node.as(Concat)
139
140 if node.left != null then
141 var next_pos = curr_pos + node.left.length
142 stack.add(new PathElement(node))
143 if next_pos > seek_pos then
144 stack.last.left = true
145 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
146 end
147 stack.last.right = true
148 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
149 else
150 var vis = new PathElement(node)
151 vis.right = true
152 stack.add(vis)
153 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
154 end
155 end
156
157 end
158
159 # Rope that cannot be modified
160 class RopeString
161 super Rope
162 super String
163
164 redef fun to_s do return self
165
166 # Inserts a String `str` at position `pos`
167 fun insert_at(str: String, pos: Int): RopeString
168 do
169 if str.length == 0 then return self
170 if self.length == 0 then return new RopeString.from(str)
171
172 assert pos >= 0 and pos <= length
173
174 if pos == length then return append(str).as(RopeString)
175
176 var path = node_at(pos)
177
178 var last_concat = new Concat
179
180 if path.offset == 0 then
181 last_concat.right = path.leaf
182 if str isa FlatString then last_concat.left = new Leaf(str) else last_concat.left = str.as(RopeString).root
183 else if path.offset == path.leaf.length then
184 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
185 last_concat.left = path.leaf
186 else
187 var s = path.leaf.str
188 var l_half = s.substring(0, s.length - path.offset)
189 var r_half = s.substring_from(s.length - path.offset)
190 var cct = new Concat
191 cct.right = new Leaf(r_half)
192 last_concat.left = new Leaf(l_half)
193 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
194 cct.left = last_concat
195 last_concat = cct
196 end
197
198 for i in path.stack.reverse_iterator do
199 var nod = new Concat
200 if i.left then
201 nod.right = i.node.right.as(not null)
202 nod.left = last_concat
203 else
204 nod.left = i.node.left.as(not null)
205 nod.right = last_concat
206 end
207 last_concat = nod
208 end
209
210 return new RopeString.from_root(last_concat)
211 end
212
213 # Adds `s` at the end of self
214 fun append(s: String): String
215 do
216 if self.is_empty then return s
217 return new RopeString.from_root(append_to_path(root,s))
218 end
219
220 # Builds a new path from root to the rightmost node with s appended
221 private fun append_to_path(node: RopeNode, s: String): RopeNode
222 do
223 var cct = new Concat
224 if node isa Leaf then
225 cct.left = node
226 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
227 else if node isa Concat then
228 var right = node.right
229 if node.left != null then cct.left = node.left.as(not null)
230 if right == null then
231 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
232 else
233 cct.right = append_to_path(right, s)
234 end
235 end
236 return cct
237 end
238
239 # O(log(n))
240 #
241 # var rope = new RopeString.from("abcd")
242 # assert rope.substring(1, 2) == "bc"
243 # assert rope.substring(-1, 2) == "a"
244 # assert rope.substring(1, 0) == ""
245 # assert rope.substring(2, 5) == "cd"
246 #
247 redef fun substring(pos, len)
248 do
249 if pos < 0 then
250 len += pos
251 pos = 0
252 end
253
254 if pos + len > length then len = length - pos
255
256 if len <= 0 then return new RopeString.from("")
257
258 var path = node_at(pos)
259
260 var lf = path.leaf
261 var offset = path.offset
262
263 if path.leaf.str.length - offset > len then lf = new Leaf(lf.str.substring(offset,len)) else lf = new Leaf(lf.str.substring_from(offset))
264
265 var nod: RopeNode = lf
266
267 for i in path.stack.reverse_iterator do
268 if i.right then continue
269 var tmp = new Concat
270 tmp.left = nod
271 var r = i.node.right
272 if r != null then tmp.right = r
273 nod = tmp
274 end
275
276 var ret = new RopeString
277 ret.root = nod
278
279 path = ret.node_at(len-1)
280
281 offset = path.offset
282 nod = new Leaf(path.leaf.str.substring(0, offset+1))
283
284 for i in path.stack.reverse_iterator do
285 if i.left then continue
286 var tmp = new Concat
287 tmp.right = nod
288 var l = i.node.left
289 if l != null then tmp.left = l
290 nod = tmp
291 end
292
293 ret.root = nod
294
295 return ret
296 end
297 end
298
299 # Used to iterate on a Rope
300 private class IteratorElement
301
302 init(e: RopeNode)
303 do
304 if e isa Leaf then
305 left = true
306 right = true
307 end
308 node = e
309 end
310
311 # The node being visited
312 var node: RopeNode
313 # If the node has a left child, was it visited ?
314 var left = false
315 # If the node has a right child, was it visited ?
316 var right = false
317 # Was the current node visited ?
318 var done = false
319 end
320
321 # Simple Postfix iterator on the nodes of a Rope
322 private class Postfix
323 super IndexedIterator[RopeNode]
324
325 # Target Rope to iterate on
326 var target: Rope
327
328 # Current position in Rope
329 var pos: Int
330
331 # Visited nodes
332 var stack = new List[IteratorElement]
333
334 init from(tgt: Rope, pos: Int)
335 do
336 self.target = tgt
337 self.pos = pos
338 if pos < 0 or pos >= tgt.length then return
339 var path = tgt.node_at(pos)
340 self.pos -= path.offset
341 for i in path.stack do
342 var item = new IteratorElement(i.node)
343 item.left = true
344 if i.right then item.right = true
345 stack.push item
346 end
347 var item = new IteratorElement(path.leaf)
348 item.done = true
349 stack.push item
350 end
351
352 redef fun item
353 do
354 assert is_ok
355 return stack.last.node
356 end
357
358 redef fun is_ok do return not stack.is_empty
359
360 redef fun index do return pos
361
362 redef fun next do
363 if stack.is_empty then return
364 if pos > target.length-1 then
365 stack.clear
366 return
367 end
368 var lst = stack.last
369 if lst.done then
370 if lst.node isa Leaf then
371 pos += lst.node.length
372 end
373 stack.pop
374 next
375 return
376 end
377 if not lst.left then
378 lst.left = true
379 var nod = lst.node
380 if nod isa Concat and nod.left != null then
381 stack.push(new IteratorElement(nod.left.as(not null)))
382 next
383 return
384 end
385 end
386 if not lst.right then
387 lst.right = true
388 var nod = lst.node
389 if nod isa Concat and nod.right != null then
390 stack.push(new IteratorElement(nod.right.as(not null)))
391 next
392 return
393 end
394 end
395 lst.done = true
396 end
397 end
398
399 # Iterates on the leaves (substrings) of the Rope
400 class LeavesIterator
401 super IndexedIterator[Leaf]
402
403 private var nodes: Postfix
404
405 init(tgt: Rope, pos: Int)
406 do
407 nodes = tgt.postfix(pos)
408 end
409
410 redef fun is_ok do return nodes.is_ok
411
412 redef fun item
413 do
414 assert is_ok
415 return nodes.item.as(Leaf)
416 end
417
418 redef fun index do return nodes.index
419
420 redef fun next
421 do
422 while nodes.is_ok do
423 nodes.next
424 if nodes.is_ok and nodes.item isa Leaf then break
425 end
426 end
427 end
428
429 # Uses the leaves and calculates a new substring on each iteration
430 class SubstringsIterator
431 super IndexedIterator[Text]
432
433 private var nodes: IndexedIterator[Leaf]
434
435 # Current position in Rope
436 var pos: Int
437
438 # Current substring, computed from the current Leaf and indexes
439 var substring: Text
440
441 init(tgt: Rope, pos: Int)
442 do
443 nodes = tgt.leaves(pos)
444 self.pos = pos
445 if pos < 0 or pos >= tgt.length then return
446 make_substring
447 end
448
449 # Compute the bounds of the current substring and makes the substring
450 private fun make_substring
451 do
452 substring = nodes.item.str
453 var min = 0
454 var length = substring.length
455 if nodes.index < pos then
456 min = pos - nodes.index
457 end
458 substring = substring.substring(min, length)
459 end
460
461 redef fun is_ok do return nodes.is_ok
462
463 redef fun item
464 do
465 assert is_ok
466 return substring
467 end
468
469 redef fun index do return pos
470
471 redef fun next
472 do
473 pos += substring.length
474 nodes.next
475 if nodes.is_ok then make_substring
476 end
477
478 end
479
480 class RopeCharIterator
481 super IndexedIterator[Char]
482
483 var substrings: IndexedIterator[Text]
484
485 var pos: Int
486
487 var max: Int
488
489 var substr_iter: IndexedIterator[Char]
490
491 init(tgt: Rope, from: Int)
492 do
493 substrings = tgt.substrings_from(from)
494 max = tgt.length - 1
495 if not substrings.is_ok then
496 pos = tgt.length
497 return
498 end
499 pos = from
500 substr_iter = substrings.item.chars.iterator
501 end
502
503 redef fun item do return substr_iter.item
504
505 redef fun is_ok do return pos <= max
506
507 redef fun index do return pos
508
509 redef fun next
510 do
511 pos += 1
512 if substr_iter.is_ok then
513 substr_iter.next
514 end
515 if not substr_iter.is_ok then
516 substrings.next
517 if substrings.is_ok then
518 substr_iter = substrings.item.chars.iterator
519 end
520 end
521 end
522 end
523