f69b5476f7c91a07e3fbab86c923a5350755060a
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatString
82
83 init(val: FlatString) do
84 self.str = val
85 length = str.length
86 end
87
88 end
89
90 # Basic structure, binary tree with a root node.
91 #
92 # Also shared services by subsequent implementations.
93 abstract class Rope
94 super Text
95
96 # Root node, entry point of a Rope.
97 private var root: RopeNode
98
99 # Empty Rope
100 init do from("")
101
102 # Creates a new Rope with `s` as root
103 init from(s: String) do
104 if s isa RopeString then root = s.root else root = new Leaf(s.as(FlatString))
105 end
106
107 private init from_root(r: RopeNode)
108 do
109 root = r
110 end
111
112 redef fun length do return root.length
113
114 # Iterator on the nodes of the rope, in forward postfix order
115 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
116
117 # Iterator on the leaves of the rope, forward order
118 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
119
120 # Iterator on the substrings from 0, in forward order
121 fun substrings: IndexedIterator[Text] do return new SubstringsIterator(self, 0)
122
123 # Iterator on the substrings, starting at position `from`, in forward order
124 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
125
126 # Path to the Leaf for `position`
127 private fun node_at(position: Int): Path
128 do
129 assert position >= 0 and position < length
130 return get_node_from(root.as(not null), 0, position, new List[PathElement])
131 end
132
133 # Builds the path to Leaf at position `seek_pos`
134 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
135 do
136 assert curr_pos >= 0
137 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
138 node = node.as(Concat)
139
140 if node.left != null then
141 var next_pos = curr_pos + node.left.length
142 stack.add(new PathElement(node))
143 if next_pos > seek_pos then
144 stack.last.left = true
145 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
146 end
147 stack.last.right = true
148 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
149 else
150 var vis = new PathElement(node)
151 vis.right = true
152 stack.add(vis)
153 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
154 end
155 end
156
157 redef fun ==(o)
158 do
159 if not o isa Text then return false
160 if o.length != self.length then return false
161 var oit = o.chars.iterator
162 for i in self.chars.iterator do
163 if i != oit.item then return false
164 oit.next
165 end
166 return true
167 end
168 end
169
170 # Rope that cannot be modified
171 class RopeString
172 super Rope
173 super String
174
175 redef fun to_s do return self
176
177 redef fun empty do return once new RopeString.from("")
178
179 redef fun reversed
180 do
181 var ret = empty.as(RopeString)
182 for i in substrings do
183 ret = ret.prepend(i.reversed.to_s).as(RopeString)
184 end
185 return ret
186 end
187
188 redef fun to_upper
189 do
190 var ret = empty
191 for i in substrings do
192 ret += i.to_upper
193 end
194 return ret
195 end
196
197 redef fun to_lower
198 do
199 var ret = empty
200 for i in substrings do
201 ret += i.to_lower
202 end
203 return ret
204 end
205
206 redef fun +(o) do return insert_at(o.to_s, length)
207
208 redef fun *(n)
209 do
210 var ret = new RopeString.from("")
211 for i in [0..n[ do
212 ret = (ret + self).as(RopeString)
213 end
214 return ret
215 end
216
217 # Inserts a String `str` at position `pos`
218 fun insert_at(str: String, pos: Int): RopeString
219 do
220 if str.length == 0 then return self
221 if self.length == 0 then return new RopeString.from(str)
222
223 assert pos >= 0 and pos <= length
224
225 if pos == length then return append(str).as(RopeString)
226
227 var path = node_at(pos)
228
229 var last_concat = new Concat
230
231 if path.offset == 0 then
232 last_concat.right = path.leaf
233 if str isa FlatString then last_concat.left = new Leaf(str) else last_concat.left = str.as(RopeString).root
234 else if path.offset == path.leaf.length then
235 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
236 last_concat.left = path.leaf
237 else
238 var s = path.leaf.str
239 var l_half = s.substring(0, s.length - path.offset)
240 var r_half = s.substring_from(s.length - path.offset)
241 var cct = new Concat
242 cct.right = new Leaf(r_half)
243 last_concat.left = new Leaf(l_half)
244 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
245 cct.left = last_concat
246 last_concat = cct
247 end
248
249 for i in path.stack.reverse_iterator do
250 var nod = new Concat
251 if i.left then
252 nod.right = i.node.right.as(not null)
253 nod.left = last_concat
254 else
255 nod.left = i.node.left.as(not null)
256 nod.right = last_concat
257 end
258 last_concat = nod
259 end
260
261 return new RopeString.from_root(last_concat)
262 end
263
264 # Adds `s` at the beginning of self
265 fun prepend(s: String): String do return insert_at(s, 0)
266
267 # Adds `s` at the end of self
268 fun append(s: String): String
269 do
270 if self.is_empty then return s
271 return new RopeString.from_root(append_to_path(root,s))
272 end
273
274 # Builds a new path from root to the rightmost node with s appended
275 private fun append_to_path(node: RopeNode, s: String): RopeNode
276 do
277 var cct = new Concat
278 if node isa Leaf then
279 cct.left = node
280 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
281 else if node isa Concat then
282 var right = node.right
283 if node.left != null then cct.left = node.left.as(not null)
284 if right == null then
285 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
286 else
287 cct.right = append_to_path(right, s)
288 end
289 end
290 return cct
291 end
292
293 # O(log(n))
294 #
295 # var rope = new RopeString.from("abcd")
296 # assert rope.substring(1, 2) == "bc"
297 # assert rope.substring(-1, 2) == "a"
298 # assert rope.substring(1, 0) == ""
299 # assert rope.substring(2, 5) == "cd"
300 #
301 redef fun substring(pos, len)
302 do
303 if pos < 0 then
304 len += pos
305 pos = 0
306 end
307
308 if pos + len > length then len = length - pos
309
310 if len <= 0 then return new RopeString.from("")
311
312 var path = node_at(pos)
313
314 var lf = path.leaf
315 var offset = path.offset
316
317 if path.leaf.str.length - offset > len then lf = new Leaf(lf.str.substring(offset,len)) else lf = new Leaf(lf.str.substring_from(offset))
318
319 var nod: RopeNode = lf
320
321 for i in path.stack.reverse_iterator do
322 if i.right then continue
323 var tmp = new Concat
324 tmp.left = nod
325 var r = i.node.right
326 if r != null then tmp.right = r
327 nod = tmp
328 end
329
330 var ret = new RopeString
331 ret.root = nod
332
333 path = ret.node_at(len-1)
334
335 offset = path.offset
336 nod = new Leaf(path.leaf.str.substring(0, offset+1))
337
338 for i in path.stack.reverse_iterator do
339 if i.left then continue
340 var tmp = new Concat
341 tmp.right = nod
342 var l = i.node.left
343 if l != null then tmp.left = l
344 nod = tmp
345 end
346
347 ret.root = nod
348
349 return ret
350 end
351 end
352
353 # Used to iterate on a Rope
354 private class IteratorElement
355
356 init(e: RopeNode)
357 do
358 if e isa Leaf then
359 left = true
360 right = true
361 end
362 node = e
363 end
364
365 # The node being visited
366 var node: RopeNode
367 # If the node has a left child, was it visited ?
368 var left = false
369 # If the node has a right child, was it visited ?
370 var right = false
371 # Was the current node visited ?
372 var done = false
373 end
374
375 # Simple Postfix iterator on the nodes of a Rope
376 private class Postfix
377 super IndexedIterator[RopeNode]
378
379 # Target Rope to iterate on
380 var target: Rope
381
382 # Current position in Rope
383 var pos: Int
384
385 # Visited nodes
386 var stack = new List[IteratorElement]
387
388 init from(tgt: Rope, pos: Int)
389 do
390 self.target = tgt
391 self.pos = pos
392 if pos < 0 or pos >= tgt.length then return
393 var path = tgt.node_at(pos)
394 self.pos -= path.offset
395 for i in path.stack do
396 var item = new IteratorElement(i.node)
397 item.left = true
398 if i.right then item.right = true
399 stack.push item
400 end
401 var item = new IteratorElement(path.leaf)
402 item.done = true
403 stack.push item
404 end
405
406 redef fun item
407 do
408 assert is_ok
409 return stack.last.node
410 end
411
412 redef fun is_ok do return not stack.is_empty
413
414 redef fun index do return pos
415
416 redef fun next do
417 if stack.is_empty then return
418 if pos > target.length-1 then
419 stack.clear
420 return
421 end
422 var lst = stack.last
423 if lst.done then
424 if lst.node isa Leaf then
425 pos += lst.node.length
426 end
427 stack.pop
428 next
429 return
430 end
431 if not lst.left then
432 lst.left = true
433 var nod = lst.node
434 if nod isa Concat and nod.left != null then
435 stack.push(new IteratorElement(nod.left.as(not null)))
436 next
437 return
438 end
439 end
440 if not lst.right then
441 lst.right = true
442 var nod = lst.node
443 if nod isa Concat and nod.right != null then
444 stack.push(new IteratorElement(nod.right.as(not null)))
445 next
446 return
447 end
448 end
449 lst.done = true
450 end
451 end
452
453 # Iterates on the leaves (substrings) of the Rope
454 class LeavesIterator
455 super IndexedIterator[Leaf]
456
457 private var nodes: Postfix
458
459 init(tgt: Rope, pos: Int)
460 do
461 nodes = tgt.postfix(pos)
462 end
463
464 redef fun is_ok do return nodes.is_ok
465
466 redef fun item
467 do
468 assert is_ok
469 return nodes.item.as(Leaf)
470 end
471
472 redef fun index do return nodes.index
473
474 redef fun next
475 do
476 while nodes.is_ok do
477 nodes.next
478 if nodes.is_ok and nodes.item isa Leaf then break
479 end
480 end
481 end
482
483 # Uses the leaves and calculates a new substring on each iteration
484 class SubstringsIterator
485 super IndexedIterator[Text]
486
487 private var nodes: IndexedIterator[Leaf]
488
489 # Current position in Rope
490 var pos: Int
491
492 # Current substring, computed from the current Leaf and indexes
493 var substring: Text
494
495 init(tgt: Rope, pos: Int)
496 do
497 nodes = tgt.leaves(pos)
498 self.pos = pos
499 if pos < 0 or pos >= tgt.length then return
500 make_substring
501 end
502
503 # Compute the bounds of the current substring and makes the substring
504 private fun make_substring
505 do
506 substring = nodes.item.str
507 var min = 0
508 var length = substring.length
509 if nodes.index < pos then
510 min = pos - nodes.index
511 end
512 substring = substring.substring(min, length)
513 end
514
515 redef fun is_ok do return nodes.is_ok
516
517 redef fun item
518 do
519 assert is_ok
520 return substring
521 end
522
523 redef fun index do return pos
524
525 redef fun next
526 do
527 pos += substring.length
528 nodes.next
529 if nodes.is_ok then make_substring
530 end
531
532 end
533
534 class RopeCharIterator
535 super IndexedIterator[Char]
536
537 var substrings: IndexedIterator[Text]
538
539 var pos: Int
540
541 var max: Int
542
543 var substr_iter: IndexedIterator[Char]
544
545 init(tgt: Rope, from: Int)
546 do
547 substrings = tgt.substrings_from(from)
548 max = tgt.length - 1
549 if not substrings.is_ok then
550 pos = tgt.length
551 return
552 end
553 pos = from
554 substr_iter = substrings.item.chars.iterator
555 end
556
557 redef fun item do return substr_iter.item
558
559 redef fun is_ok do return pos <= max
560
561 redef fun index do return pos
562
563 redef fun next
564 do
565 pos += 1
566 if substr_iter.is_ok then
567 substr_iter.next
568 end
569 if not substr_iter.is_ok then
570 substrings.next
571 if substrings.is_ok then
572 substr_iter = substrings.item.chars.iterator
573 end
574 end
575 end
576 end
577