lib/standard/ropes: Added prepend service, alias to insert_at(0).
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatString
82
83 init(val: FlatString) do
84 self.str = val
85 length = str.length
86 end
87
88 end
89
90 # Basic structure, binary tree with a root node.
91 #
92 # Also shared services by subsequent implementations.
93 abstract class Rope
94 super Text
95
96 # Root node, entry point of a Rope.
97 private var root: RopeNode
98
99 # Empty Rope
100 init do from("")
101
102 # Creates a new Rope with `s` as root
103 init from(s: String) do
104 if s isa RopeString then root = s.root else root = new Leaf(s.as(FlatString))
105 end
106
107 private init from_root(r: RopeNode)
108 do
109 root = r
110 end
111
112 redef fun length do return root.length
113
114 # Iterator on the nodes of the rope, in forward postfix order
115 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
116
117 # Iterator on the leaves of the rope, forward order
118 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
119
120 # Iterator on the substrings from 0, in forward order
121 fun substrings: IndexedIterator[Text] do return new SubstringsIterator(self, 0)
122
123 # Iterator on the substrings, starting at position `from`, in forward order
124 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
125
126 # Path to the Leaf for `position`
127 private fun node_at(position: Int): Path
128 do
129 assert position >= 0 and position < length
130 return get_node_from(root.as(not null), 0, position, new List[PathElement])
131 end
132
133 # Builds the path to Leaf at position `seek_pos`
134 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
135 do
136 assert curr_pos >= 0
137 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
138 node = node.as(Concat)
139
140 if node.left != null then
141 var next_pos = curr_pos + node.left.length
142 stack.add(new PathElement(node))
143 if next_pos > seek_pos then
144 stack.last.left = true
145 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
146 end
147 stack.last.right = true
148 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
149 else
150 var vis = new PathElement(node)
151 vis.right = true
152 stack.add(vis)
153 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
154 end
155 end
156
157 end
158
159 # Rope that cannot be modified
160 class RopeString
161 super Rope
162 super String
163
164 redef fun to_s do return self
165
166 # Inserts a String `str` at position `pos`
167 fun insert_at(str: String, pos: Int): RopeString
168 do
169 if str.length == 0 then return self
170 if self.length == 0 then return new RopeString.from(str)
171
172 assert pos >= 0 and pos <= length
173
174 if pos == length then return append(str).as(RopeString)
175
176 var path = node_at(pos)
177
178 var last_concat = new Concat
179
180 if path.offset == 0 then
181 last_concat.right = path.leaf
182 if str isa FlatString then last_concat.left = new Leaf(str) else last_concat.left = str.as(RopeString).root
183 else if path.offset == path.leaf.length then
184 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
185 last_concat.left = path.leaf
186 else
187 var s = path.leaf.str
188 var l_half = s.substring(0, s.length - path.offset)
189 var r_half = s.substring_from(s.length - path.offset)
190 var cct = new Concat
191 cct.right = new Leaf(r_half)
192 last_concat.left = new Leaf(l_half)
193 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
194 cct.left = last_concat
195 last_concat = cct
196 end
197
198 for i in path.stack.reverse_iterator do
199 var nod = new Concat
200 if i.left then
201 nod.right = i.node.right.as(not null)
202 nod.left = last_concat
203 else
204 nod.left = i.node.left.as(not null)
205 nod.right = last_concat
206 end
207 last_concat = nod
208 end
209
210 return new RopeString.from_root(last_concat)
211 end
212
213 # Adds `s` at the beginning of self
214 fun prepend(s: String): String do return insert_at(s, 0)
215
216 # Adds `s` at the end of self
217 fun append(s: String): String
218 do
219 if self.is_empty then return s
220 return new RopeString.from_root(append_to_path(root,s))
221 end
222
223 # Builds a new path from root to the rightmost node with s appended
224 private fun append_to_path(node: RopeNode, s: String): RopeNode
225 do
226 var cct = new Concat
227 if node isa Leaf then
228 cct.left = node
229 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
230 else if node isa Concat then
231 var right = node.right
232 if node.left != null then cct.left = node.left.as(not null)
233 if right == null then
234 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
235 else
236 cct.right = append_to_path(right, s)
237 end
238 end
239 return cct
240 end
241
242 # O(log(n))
243 #
244 # var rope = new RopeString.from("abcd")
245 # assert rope.substring(1, 2) == "bc"
246 # assert rope.substring(-1, 2) == "a"
247 # assert rope.substring(1, 0) == ""
248 # assert rope.substring(2, 5) == "cd"
249 #
250 redef fun substring(pos, len)
251 do
252 if pos < 0 then
253 len += pos
254 pos = 0
255 end
256
257 if pos + len > length then len = length - pos
258
259 if len <= 0 then return new RopeString.from("")
260
261 var path = node_at(pos)
262
263 var lf = path.leaf
264 var offset = path.offset
265
266 if path.leaf.str.length - offset > len then lf = new Leaf(lf.str.substring(offset,len)) else lf = new Leaf(lf.str.substring_from(offset))
267
268 var nod: RopeNode = lf
269
270 for i in path.stack.reverse_iterator do
271 if i.right then continue
272 var tmp = new Concat
273 tmp.left = nod
274 var r = i.node.right
275 if r != null then tmp.right = r
276 nod = tmp
277 end
278
279 var ret = new RopeString
280 ret.root = nod
281
282 path = ret.node_at(len-1)
283
284 offset = path.offset
285 nod = new Leaf(path.leaf.str.substring(0, offset+1))
286
287 for i in path.stack.reverse_iterator do
288 if i.left then continue
289 var tmp = new Concat
290 tmp.right = nod
291 var l = i.node.left
292 if l != null then tmp.left = l
293 nod = tmp
294 end
295
296 ret.root = nod
297
298 return ret
299 end
300 end
301
302 # Used to iterate on a Rope
303 private class IteratorElement
304
305 init(e: RopeNode)
306 do
307 if e isa Leaf then
308 left = true
309 right = true
310 end
311 node = e
312 end
313
314 # The node being visited
315 var node: RopeNode
316 # If the node has a left child, was it visited ?
317 var left = false
318 # If the node has a right child, was it visited ?
319 var right = false
320 # Was the current node visited ?
321 var done = false
322 end
323
324 # Simple Postfix iterator on the nodes of a Rope
325 private class Postfix
326 super IndexedIterator[RopeNode]
327
328 # Target Rope to iterate on
329 var target: Rope
330
331 # Current position in Rope
332 var pos: Int
333
334 # Visited nodes
335 var stack = new List[IteratorElement]
336
337 init from(tgt: Rope, pos: Int)
338 do
339 self.target = tgt
340 self.pos = pos
341 if pos < 0 or pos >= tgt.length then return
342 var path = tgt.node_at(pos)
343 self.pos -= path.offset
344 for i in path.stack do
345 var item = new IteratorElement(i.node)
346 item.left = true
347 if i.right then item.right = true
348 stack.push item
349 end
350 var item = new IteratorElement(path.leaf)
351 item.done = true
352 stack.push item
353 end
354
355 redef fun item
356 do
357 assert is_ok
358 return stack.last.node
359 end
360
361 redef fun is_ok do return not stack.is_empty
362
363 redef fun index do return pos
364
365 redef fun next do
366 if stack.is_empty then return
367 if pos > target.length-1 then
368 stack.clear
369 return
370 end
371 var lst = stack.last
372 if lst.done then
373 if lst.node isa Leaf then
374 pos += lst.node.length
375 end
376 stack.pop
377 next
378 return
379 end
380 if not lst.left then
381 lst.left = true
382 var nod = lst.node
383 if nod isa Concat and nod.left != null then
384 stack.push(new IteratorElement(nod.left.as(not null)))
385 next
386 return
387 end
388 end
389 if not lst.right then
390 lst.right = true
391 var nod = lst.node
392 if nod isa Concat and nod.right != null then
393 stack.push(new IteratorElement(nod.right.as(not null)))
394 next
395 return
396 end
397 end
398 lst.done = true
399 end
400 end
401
402 # Iterates on the leaves (substrings) of the Rope
403 class LeavesIterator
404 super IndexedIterator[Leaf]
405
406 private var nodes: Postfix
407
408 init(tgt: Rope, pos: Int)
409 do
410 nodes = tgt.postfix(pos)
411 end
412
413 redef fun is_ok do return nodes.is_ok
414
415 redef fun item
416 do
417 assert is_ok
418 return nodes.item.as(Leaf)
419 end
420
421 redef fun index do return nodes.index
422
423 redef fun next
424 do
425 while nodes.is_ok do
426 nodes.next
427 if nodes.is_ok and nodes.item isa Leaf then break
428 end
429 end
430 end
431
432 # Uses the leaves and calculates a new substring on each iteration
433 class SubstringsIterator
434 super IndexedIterator[Text]
435
436 private var nodes: IndexedIterator[Leaf]
437
438 # Current position in Rope
439 var pos: Int
440
441 # Current substring, computed from the current Leaf and indexes
442 var substring: Text
443
444 init(tgt: Rope, pos: Int)
445 do
446 nodes = tgt.leaves(pos)
447 self.pos = pos
448 if pos < 0 or pos >= tgt.length then return
449 make_substring
450 end
451
452 # Compute the bounds of the current substring and makes the substring
453 private fun make_substring
454 do
455 substring = nodes.item.str
456 var min = 0
457 var length = substring.length
458 if nodes.index < pos then
459 min = pos - nodes.index
460 end
461 substring = substring.substring(min, length)
462 end
463
464 redef fun is_ok do return nodes.is_ok
465
466 redef fun item
467 do
468 assert is_ok
469 return substring
470 end
471
472 redef fun index do return pos
473
474 redef fun next
475 do
476 pos += substring.length
477 nodes.next
478 if nodes.is_ok then make_substring
479 end
480
481 end
482
483 class RopeCharIterator
484 super IndexedIterator[Char]
485
486 var substrings: IndexedIterator[Text]
487
488 var pos: Int
489
490 var max: Int
491
492 var substr_iter: IndexedIterator[Char]
493
494 init(tgt: Rope, from: Int)
495 do
496 substrings = tgt.substrings_from(from)
497 max = tgt.length - 1
498 if not substrings.is_ok then
499 pos = tgt.length
500 return
501 end
502 pos = from
503 substr_iter = substrings.item.chars.iterator
504 end
505
506 redef fun item do return substr_iter.item
507
508 redef fun is_ok do return pos <= max
509
510 redef fun index do return pos
511
512 redef fun next
513 do
514 pos += 1
515 if substr_iter.is_ok then
516 substr_iter.next
517 end
518 if not substr_iter.is_ok then
519 substrings.next
520 if substrings.is_ok then
521 substr_iter = substrings.item.chars.iterator
522 end
523 end
524 end
525 end
526