lib/standard/ropes: Adapted output to Ropes.
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatString
82
83 init(val: FlatString) do
84 self.str = val
85 length = str.length
86 end
87
88 end
89
90 # Basic structure, binary tree with a root node.
91 #
92 # Also shared services by subsequent implementations.
93 abstract class Rope
94 super Text
95
96 # Root node, entry point of a Rope.
97 private var root: RopeNode
98
99 # Cached version of self as a flat String
100 private var str_representation: nullable NativeString = null
101
102 # Empty Rope
103 init do from("")
104
105 # Creates a new Rope with `s` as root
106 init from(s: String) do
107 if s isa RopeString then root = s.root else root = new Leaf(s.as(FlatString))
108 end
109
110 private init from_root(r: RopeNode)
111 do
112 root = r
113 end
114
115 redef fun length do return root.length
116
117 # Iterator on the nodes of the rope, in forward postfix order
118 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
119
120 # Iterator on the leaves of the rope, forward order
121 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
122
123 # Iterator on the substrings from 0, in forward order
124 fun substrings: IndexedIterator[Text] do return new SubstringsIterator(self, 0)
125
126 # Iterator on the substrings, starting at position `from`, in forward order
127 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
128
129 redef fun output
130 do
131 for i in substrings do
132 i.output
133 end
134 end
135
136 redef fun to_cstring
137 do
138 if str_representation != null then return str_representation.as(not null)
139
140 var native_final_str = calloc_string(length + 1)
141
142 native_final_str[length] = '\0'
143
144 if self.length == 0 then
145 str_representation = native_final_str
146 return native_final_str
147 end
148
149 var offset = 0
150
151 for i in substrings do
152 var str = i.flatten
153 if str isa FlatString then str.items.copy_to(native_final_str, str.length, str.index_from, offset)
154 offset += i.length
155 end
156
157 str_representation = native_final_str
158
159 return native_final_str
160 end
161
162 # Path to the Leaf for `position`
163 private fun node_at(position: Int): Path
164 do
165 assert position >= 0 and position < length
166 return get_node_from(root.as(not null), 0, position, new List[PathElement])
167 end
168
169 # Builds the path to Leaf at position `seek_pos`
170 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
171 do
172 assert curr_pos >= 0
173 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
174 node = node.as(Concat)
175
176 if node.left != null then
177 var next_pos = curr_pos + node.left.length
178 stack.add(new PathElement(node))
179 if next_pos > seek_pos then
180 stack.last.left = true
181 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
182 end
183 stack.last.right = true
184 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
185 else
186 var vis = new PathElement(node)
187 vis.right = true
188 stack.add(vis)
189 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
190 end
191 end
192
193 redef fun ==(o)
194 do
195 if not o isa Text then return false
196 if o.length != self.length then return false
197 var oit = o.chars.iterator
198 for i in self.chars.iterator do
199 if i != oit.item then return false
200 oit.next
201 end
202 return true
203 end
204 end
205
206 # Rope that cannot be modified
207 class RopeString
208 super Rope
209 super String
210
211 redef fun to_s do return self
212
213 redef fun empty do return once new RopeString.from("")
214
215 redef fun reversed
216 do
217 var ret = empty.as(RopeString)
218 for i in substrings do
219 ret = ret.prepend(i.reversed.to_s).as(RopeString)
220 end
221 return ret
222 end
223
224 redef fun to_upper
225 do
226 var ret = empty
227 for i in substrings do
228 ret += i.to_upper
229 end
230 return ret
231 end
232
233 redef fun to_lower
234 do
235 var ret = empty
236 for i in substrings do
237 ret += i.to_lower
238 end
239 return ret
240 end
241
242 redef fun +(o) do return insert_at(o.to_s, length)
243
244 redef fun *(n)
245 do
246 var ret = new RopeString.from("")
247 for i in [0..n[ do
248 ret = (ret + self).as(RopeString)
249 end
250 return ret
251 end
252
253 # Inserts a String `str` at position `pos`
254 fun insert_at(str: String, pos: Int): RopeString
255 do
256 if str.length == 0 then return self
257 if self.length == 0 then return new RopeString.from(str)
258
259 assert pos >= 0 and pos <= length
260
261 if pos == length then return append(str).as(RopeString)
262
263 var path = node_at(pos)
264
265 var last_concat = new Concat
266
267 if path.offset == 0 then
268 last_concat.right = path.leaf
269 if str isa FlatString then last_concat.left = new Leaf(str) else last_concat.left = str.as(RopeString).root
270 else if path.offset == path.leaf.length then
271 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
272 last_concat.left = path.leaf
273 else
274 var s = path.leaf.str
275 var l_half = s.substring(0, s.length - path.offset)
276 var r_half = s.substring_from(s.length - path.offset)
277 var cct = new Concat
278 cct.right = new Leaf(r_half)
279 last_concat.left = new Leaf(l_half)
280 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
281 cct.left = last_concat
282 last_concat = cct
283 end
284
285 for i in path.stack.reverse_iterator do
286 var nod = new Concat
287 if i.left then
288 nod.right = i.node.right.as(not null)
289 nod.left = last_concat
290 else
291 nod.left = i.node.left.as(not null)
292 nod.right = last_concat
293 end
294 last_concat = nod
295 end
296
297 return new RopeString.from_root(last_concat)
298 end
299
300 # Adds `s` at the beginning of self
301 fun prepend(s: String): String do return insert_at(s, 0)
302
303 # Adds `s` at the end of self
304 fun append(s: String): String
305 do
306 if self.is_empty then return s
307 return new RopeString.from_root(append_to_path(root,s))
308 end
309
310 # Builds a new path from root to the rightmost node with s appended
311 private fun append_to_path(node: RopeNode, s: String): RopeNode
312 do
313 var cct = new Concat
314 if node isa Leaf then
315 cct.left = node
316 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
317 else if node isa Concat then
318 var right = node.right
319 if node.left != null then cct.left = node.left.as(not null)
320 if right == null then
321 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
322 else
323 cct.right = append_to_path(right, s)
324 end
325 end
326 return cct
327 end
328
329 # O(log(n))
330 #
331 # var rope = new RopeString.from("abcd")
332 # assert rope.substring(1, 2) == "bc"
333 # assert rope.substring(-1, 2) == "a"
334 # assert rope.substring(1, 0) == ""
335 # assert rope.substring(2, 5) == "cd"
336 #
337 redef fun substring(pos, len)
338 do
339 if pos < 0 then
340 len += pos
341 pos = 0
342 end
343
344 if pos + len > length then len = length - pos
345
346 if len <= 0 then return new RopeString.from("")
347
348 var path = node_at(pos)
349
350 var lf = path.leaf
351 var offset = path.offset
352
353 if path.leaf.str.length - offset > len then lf = new Leaf(lf.str.substring(offset,len)) else lf = new Leaf(lf.str.substring_from(offset))
354
355 var nod: RopeNode = lf
356
357 for i in path.stack.reverse_iterator do
358 if i.right then continue
359 var tmp = new Concat
360 tmp.left = nod
361 var r = i.node.right
362 if r != null then tmp.right = r
363 nod = tmp
364 end
365
366 var ret = new RopeString
367 ret.root = nod
368
369 path = ret.node_at(len-1)
370
371 offset = path.offset
372 nod = new Leaf(path.leaf.str.substring(0, offset+1))
373
374 for i in path.stack.reverse_iterator do
375 if i.left then continue
376 var tmp = new Concat
377 tmp.right = nod
378 var l = i.node.left
379 if l != null then tmp.left = l
380 nod = tmp
381 end
382
383 ret.root = nod
384
385 return ret
386 end
387 end
388
389 # Used to iterate on a Rope
390 private class IteratorElement
391
392 init(e: RopeNode)
393 do
394 if e isa Leaf then
395 left = true
396 right = true
397 end
398 node = e
399 end
400
401 # The node being visited
402 var node: RopeNode
403 # If the node has a left child, was it visited ?
404 var left = false
405 # If the node has a right child, was it visited ?
406 var right = false
407 # Was the current node visited ?
408 var done = false
409 end
410
411 # Simple Postfix iterator on the nodes of a Rope
412 private class Postfix
413 super IndexedIterator[RopeNode]
414
415 # Target Rope to iterate on
416 var target: Rope
417
418 # Current position in Rope
419 var pos: Int
420
421 # Visited nodes
422 var stack = new List[IteratorElement]
423
424 init from(tgt: Rope, pos: Int)
425 do
426 self.target = tgt
427 self.pos = pos
428 if pos < 0 or pos >= tgt.length then return
429 var path = tgt.node_at(pos)
430 self.pos -= path.offset
431 for i in path.stack do
432 var item = new IteratorElement(i.node)
433 item.left = true
434 if i.right then item.right = true
435 stack.push item
436 end
437 var item = new IteratorElement(path.leaf)
438 item.done = true
439 stack.push item
440 end
441
442 redef fun item
443 do
444 assert is_ok
445 return stack.last.node
446 end
447
448 redef fun is_ok do return not stack.is_empty
449
450 redef fun index do return pos
451
452 redef fun next do
453 if stack.is_empty then return
454 if pos > target.length-1 then
455 stack.clear
456 return
457 end
458 var lst = stack.last
459 if lst.done then
460 if lst.node isa Leaf then
461 pos += lst.node.length
462 end
463 stack.pop
464 next
465 return
466 end
467 if not lst.left then
468 lst.left = true
469 var nod = lst.node
470 if nod isa Concat and nod.left != null then
471 stack.push(new IteratorElement(nod.left.as(not null)))
472 next
473 return
474 end
475 end
476 if not lst.right then
477 lst.right = true
478 var nod = lst.node
479 if nod isa Concat and nod.right != null then
480 stack.push(new IteratorElement(nod.right.as(not null)))
481 next
482 return
483 end
484 end
485 lst.done = true
486 end
487 end
488
489 # Iterates on the leaves (substrings) of the Rope
490 class LeavesIterator
491 super IndexedIterator[Leaf]
492
493 private var nodes: Postfix
494
495 init(tgt: Rope, pos: Int)
496 do
497 nodes = tgt.postfix(pos)
498 end
499
500 redef fun is_ok do return nodes.is_ok
501
502 redef fun item
503 do
504 assert is_ok
505 return nodes.item.as(Leaf)
506 end
507
508 redef fun index do return nodes.index
509
510 redef fun next
511 do
512 while nodes.is_ok do
513 nodes.next
514 if nodes.is_ok and nodes.item isa Leaf then break
515 end
516 end
517 end
518
519 # Uses the leaves and calculates a new substring on each iteration
520 class SubstringsIterator
521 super IndexedIterator[Text]
522
523 private var nodes: IndexedIterator[Leaf]
524
525 # Current position in Rope
526 var pos: Int
527
528 # Current substring, computed from the current Leaf and indexes
529 var substring: Text
530
531 init(tgt: Rope, pos: Int)
532 do
533 nodes = tgt.leaves(pos)
534 self.pos = pos
535 if pos < 0 or pos >= tgt.length then return
536 make_substring
537 end
538
539 # Compute the bounds of the current substring and makes the substring
540 private fun make_substring
541 do
542 substring = nodes.item.str
543 var min = 0
544 var length = substring.length
545 if nodes.index < pos then
546 min = pos - nodes.index
547 end
548 substring = substring.substring(min, length)
549 end
550
551 redef fun is_ok do return nodes.is_ok
552
553 redef fun item
554 do
555 assert is_ok
556 return substring
557 end
558
559 redef fun index do return pos
560
561 redef fun next
562 do
563 pos += substring.length
564 nodes.next
565 if nodes.is_ok then make_substring
566 end
567
568 end
569
570 class RopeCharIterator
571 super IndexedIterator[Char]
572
573 var substrings: IndexedIterator[Text]
574
575 var pos: Int
576
577 var max: Int
578
579 var substr_iter: IndexedIterator[Char]
580
581 init(tgt: Rope, from: Int)
582 do
583 substrings = tgt.substrings_from(from)
584 max = tgt.length - 1
585 if not substrings.is_ok then
586 pos = tgt.length
587 return
588 end
589 pos = from
590 substr_iter = substrings.item.chars.iterator
591 end
592
593 redef fun item do return substr_iter.item
594
595 redef fun is_ok do return pos <= max
596
597 redef fun index do return pos
598
599 redef fun next
600 do
601 pos += 1
602 if substr_iter.is_ok then
603 substr_iter.next
604 end
605 if not substr_iter.is_ok then
606 substrings.next
607 if substrings.is_ok then
608 substr_iter = substrings.item.chars.iterator
609 end
610 end
611 end
612 end
613