lib/standard/ropes: Added a forward postfix iterator on Rope.
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatString
82
83 init(val: FlatString) do
84 self.str = val
85 length = str.length
86 end
87
88 end
89
90 # Basic structure, binary tree with a root node.
91 #
92 # Also shared services by subsequent implementations.
93 abstract class Rope
94 super Text
95
96 # Root node, entry point of a Rope.
97 private var root: RopeNode
98
99 # Empty Rope
100 init do from("")
101
102 # Creates a new Rope with `s` as root
103 init from(s: String) do
104 if s isa RopeString then root = s.root else root = new Leaf(s.as(FlatString))
105 end
106
107 private init from_root(r: RopeNode)
108 do
109 root = r
110 end
111
112 redef fun length do return root.length
113
114 # Iterator on the nodes of the rope, in forward postfix order
115 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
116
117 # Path to the Leaf for `position`
118 private fun node_at(position: Int): Path
119 do
120 assert position >= 0 and position < length
121 return get_node_from(root.as(not null), 0, position, new List[PathElement])
122 end
123
124 # Builds the path to Leaf at position `seek_pos`
125 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
126 do
127 assert curr_pos >= 0
128 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
129 node = node.as(Concat)
130
131 if node.left != null then
132 var next_pos = curr_pos + node.left.length
133 stack.add(new PathElement(node))
134 if next_pos > seek_pos then
135 stack.last.left = true
136 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
137 end
138 stack.last.right = true
139 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
140 else
141 var vis = new PathElement(node)
142 vis.right = true
143 stack.add(vis)
144 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
145 end
146 end
147
148 end
149
150 # Rope that cannot be modified
151 class RopeString
152 super Rope
153 super String
154
155 redef fun to_s do return self
156
157 # Inserts a String `str` at position `pos`
158 fun insert_at(str: String, pos: Int): RopeString
159 do
160 if str.length == 0 then return self
161 if self.length == 0 then return new RopeString.from(str)
162
163 assert pos >= 0 and pos <= length
164
165 if pos == length then return append(str).as(RopeString)
166
167 var path = node_at(pos)
168
169 var last_concat = new Concat
170
171 if path.offset == 0 then
172 last_concat.right = path.leaf
173 if str isa FlatString then last_concat.left = new Leaf(str) else last_concat.left = str.as(RopeString).root
174 else if path.offset == path.leaf.length then
175 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
176 last_concat.left = path.leaf
177 else
178 var s = path.leaf.str
179 var l_half = s.substring(0, s.length - path.offset)
180 var r_half = s.substring_from(s.length - path.offset)
181 var cct = new Concat
182 cct.right = new Leaf(r_half)
183 last_concat.left = new Leaf(l_half)
184 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
185 cct.left = last_concat
186 last_concat = cct
187 end
188
189 for i in path.stack.reverse_iterator do
190 var nod = new Concat
191 if i.left then
192 nod.right = i.node.right.as(not null)
193 nod.left = last_concat
194 else
195 nod.left = i.node.left.as(not null)
196 nod.right = last_concat
197 end
198 last_concat = nod
199 end
200
201 return new RopeString.from_root(last_concat)
202 end
203
204 # Adds `s` at the end of self
205 fun append(s: String): String
206 do
207 if self.is_empty then return s
208 return new RopeString.from_root(append_to_path(root,s))
209 end
210
211 # Builds a new path from root to the rightmost node with s appended
212 private fun append_to_path(node: RopeNode, s: String): RopeNode
213 do
214 var cct = new Concat
215 if node isa Leaf then
216 cct.left = node
217 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
218 else if node isa Concat then
219 var right = node.right
220 if node.left != null then cct.left = node.left.as(not null)
221 if right == null then
222 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
223 else
224 cct.right = append_to_path(right, s)
225 end
226 end
227 return cct
228 end
229
230 # O(log(n))
231 #
232 # var rope = new RopeString.from("abcd")
233 # assert rope.substring(1, 2) == "bc"
234 # assert rope.substring(-1, 2) == "a"
235 # assert rope.substring(1, 0) == ""
236 # assert rope.substring(2, 5) == "cd"
237 #
238 redef fun substring(pos, len)
239 do
240 if pos < 0 then
241 len += pos
242 pos = 0
243 end
244
245 if pos + len > length then len = length - pos
246
247 if len <= 0 then return new RopeString.from("")
248
249 var path = node_at(pos)
250
251 var lf = path.leaf
252 var offset = path.offset
253
254 if path.leaf.str.length - offset > len then lf = new Leaf(lf.str.substring(offset,len)) else lf = new Leaf(lf.str.substring_from(offset))
255
256 var nod: RopeNode = lf
257
258 for i in path.stack.reverse_iterator do
259 if i.right then continue
260 var tmp = new Concat
261 tmp.left = nod
262 var r = i.node.right
263 if r != null then tmp.right = r
264 nod = tmp
265 end
266
267 var ret = new RopeString
268 ret.root = nod
269
270 path = ret.node_at(len-1)
271
272 offset = path.offset
273 nod = new Leaf(path.leaf.str.substring(0, offset+1))
274
275 for i in path.stack.reverse_iterator do
276 if i.left then continue
277 var tmp = new Concat
278 tmp.right = nod
279 var l = i.node.left
280 if l != null then tmp.left = l
281 nod = tmp
282 end
283
284 ret.root = nod
285
286 return ret
287 end
288 end
289
290 # Used to iterate on a Rope
291 private class IteratorElement
292
293 init(e: RopeNode)
294 do
295 if e isa Leaf then
296 left = true
297 right = true
298 end
299 node = e
300 end
301
302 # The node being visited
303 var node: RopeNode
304 # If the node has a left child, was it visited ?
305 var left = false
306 # If the node has a right child, was it visited ?
307 var right = false
308 # Was the current node visited ?
309 var done = false
310 end
311
312 # Simple Postfix iterator on the nodes of a Rope
313 private class Postfix
314 super IndexedIterator[RopeNode]
315
316 # Target Rope to iterate on
317 var target: Rope
318
319 # Current position in Rope
320 var pos: Int
321
322 # Visited nodes
323 var stack = new List[IteratorElement]
324
325 init from(tgt: Rope, pos: Int)
326 do
327 self.target = tgt
328 self.pos = pos
329 if pos < 0 or pos >= tgt.length then return
330 var path = tgt.node_at(pos)
331 self.pos -= path.offset
332 for i in path.stack do
333 var item = new IteratorElement(i.node)
334 item.left = true
335 if i.right then item.right = true
336 stack.push item
337 end
338 var item = new IteratorElement(path.leaf)
339 item.done = true
340 stack.push item
341 end
342
343 redef fun item
344 do
345 assert is_ok
346 return stack.last.node
347 end
348
349 redef fun is_ok do return not stack.is_empty
350
351 redef fun index do return pos
352
353 redef fun next do
354 if stack.is_empty then return
355 if pos > target.length-1 then
356 stack.clear
357 return
358 end
359 var lst = stack.last
360 if lst.done then
361 if lst.node isa Leaf then
362 pos += lst.node.length
363 end
364 stack.pop
365 next
366 return
367 end
368 if not lst.left then
369 lst.left = true
370 var nod = lst.node
371 if nod isa Concat and nod.left != null then
372 stack.push(new IteratorElement(nod.left.as(not null)))
373 next
374 return
375 end
376 end
377 if not lst.right then
378 lst.right = true
379 var nod = lst.node
380 if nod isa Concat and nod.right != null then
381 stack.push(new IteratorElement(nod.right.as(not null)))
382 next
383 return
384 end
385 end
386 lst.done = true
387 end
388 end
389