lib: intro percent encoding/decoding methods
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47
48 # Transforms the current node to a Leaf.
49 # This might be costly to invoke since this produces a FlatString concatenation.
50 # Can be used internally to limit the growth of the Rope when working with small leaves.
51 fun to_leaf: Leaf is abstract
52 end
53
54 # Node that represents a concatenation between two nodes (of any RopeNode type)
55 private class Concat
56 super RopeNode
57
58 # Left child of the node
59 var left: nullable RopeNode
60 # Right child of the node
61 var right: nullable RopeNode
62
63 init(l: nullable RopeNode, r: nullable RopeNode)
64 do
65 left = l
66 right = r
67 if l != null then length += l.length
68 if r != null then length += r.length
69 end
70
71 redef fun to_leaf
72 do
73 if left == null then
74 if right == null then return new StringLeaf("".as(FlatString))
75 return right.to_leaf
76 end
77 if right == null then return left.as(not null).to_leaf
78 return new StringLeaf((left.to_leaf.str.as(FlatString) + right.to_leaf.str.as(FlatString)).as(FlatString))
79 end
80 end
81
82 # Leaf of a Rope, contains a FlatString
83 private abstract class Leaf
84 super RopeNode
85
86 # Encapsulated FlatString in the leaf node
87 var str: FlatText
88
89 end
90
91 private class StringLeaf
92 super Leaf
93
94 init(val: FlatString) do
95 self.str = val
96 length = str.length
97 end
98
99 redef fun to_leaf do return self
100 end
101
102 # Basic structure, binary tree with a root node.
103 #
104 # Also shared services by subsequent implementations.
105 abstract class Rope
106 super Text
107
108 # Root node, entry point of a Rope.
109 private var root: RopeNode
110
111 # Cached version of self as a flat String
112 private var str_representation: nullable NativeString = null
113
114 # Empty Rope
115 init do from("")
116
117 # Creates a new Rope with `s` as root
118 init from(s: String) do
119 if s isa RopeString then root = s.root else root = new StringLeaf(s.as(FlatString))
120 end
121
122 private init from_root(r: RopeNode)
123 do
124 root = r
125 end
126
127 redef fun length do return root.length
128
129 # Iterator on the nodes of the rope, in forward postfix order
130 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
131
132 # Iterator on the leaves of the rope, forward order
133 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
134
135 # Iterator on the substrings from 0, in forward order
136 redef fun substrings do return new SubstringsIterator(self, 0)
137
138 # Iterator on the substrings, starting at position `from`, in forward order
139 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
140
141 # Iterator on the nodes of the rope, in backwards postfix order
142 private fun reverse_postfix(from: Int): ReversePostfix do return new ReversePostfix.from(self, from)
143
144 # Iterator on the leaves of the rope, backwards order
145 private fun reverse_leaves(from: Int): ReverseLeavesIterator do return new ReverseLeavesIterator(self,from)
146
147 # Iterator on the substrings, in reverse order
148 fun reverse_substrings: IndexedIterator[Text] do return new ReverseSubstringsIterator(self, length-1)
149
150 # Iterator on the substrings, in reverse order, starting iteration at position `from`
151 fun reverse_substrings_from(from: Int): IndexedIterator[Text] do return new ReverseSubstringsIterator(self, from)
152
153 redef fun output
154 do
155 for i in substrings do
156 i.output
157 end
158 end
159
160 redef fun to_cstring
161 do
162 if str_representation != null then return str_representation.as(not null)
163
164 var native_final_str = calloc_string(length + 1)
165
166 native_final_str[length] = '\0'
167
168 if self.length == 0 then
169 str_representation = native_final_str
170 return native_final_str
171 end
172
173 var offset = 0
174
175 for i in substrings do
176 var str = i.flatten
177 if str isa FlatString then str.items.copy_to(native_final_str, str.length, str.index_from, offset)
178 offset += i.length
179 end
180
181 str_representation = native_final_str
182
183 return native_final_str
184 end
185
186 # Path to the Leaf for `position`
187 private fun node_at(position: Int): Path
188 do
189 assert position >= 0 and position <= length
190 if position == length then
191 var st = new List[PathElement]
192 stack_to_end(root,st)
193 if not st.is_empty then
194 var lst = st.last
195 var lf = lst.node.right
196 if lf != null then
197 return new Path(lf.as(Leaf), lf.length, st)
198 else
199 lf = lst.node.left
200 return new Path(lf.as(Leaf), lf.length, st)
201 end
202 else
203 return new Path(root.as(Leaf), length, st)
204 end
205 end
206 return get_node_from(root, 0, position, new List[PathElement])
207 end
208
209 # Special case for when the required pos is length
210 private fun stack_to_end(nod: RopeNode, st: List[PathElement])
211 do
212 if nod isa Leaf then return
213 var n = nod.as(Concat)
214 var r = n.right
215 var ele = new PathElement(n)
216 ele.right = true
217 st.push(ele)
218 if r != null then
219 stack_to_end(r, st)
220 end
221 return
222 end
223
224 # Builds the path to Leaf at position `seek_pos`
225 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
226 do
227 assert curr_pos >= 0
228 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
229 node = node.as(Concat)
230
231 if node.left != null then
232 var next_pos = curr_pos + node.left.length
233 stack.add(new PathElement(node))
234 if next_pos > seek_pos then
235 stack.last.left = true
236 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
237 end
238 stack.last.right = true
239 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
240 else
241 var vis = new PathElement(node)
242 vis.right = true
243 stack.add(vis)
244 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
245 end
246 end
247
248 redef fun ==(o)
249 do
250 if not o isa Text then return false
251 if o.length != self.length then return false
252 var oit = o.chars.iterator
253 for i in self.chars.iterator do
254 if i != oit.item then return false
255 oit.next
256 end
257 return true
258 end
259 end
260
261 # Rope that cannot be modified
262 class RopeString
263 super Rope
264 super String
265
266 redef fun to_s do return self
267
268 redef fun empty do return once new RopeString.from("")
269
270 redef var chars: SequenceRead[Char] = new RopeStringChars(self)
271
272 redef fun reversed
273 do
274 var ret = empty
275 for i in substrings do
276 ret = i.as(String).reversed + ret
277 end
278 return ret
279 end
280
281 redef fun to_upper
282 do
283 var ret = empty
284 for i in substrings do
285 ret += i.as(String).to_upper
286 end
287 return ret
288 end
289
290 redef fun to_lower
291 do
292 var ret = empty
293 for i in substrings do
294 ret += i.as(String).to_lower
295 end
296 return ret
297 end
298
299 redef fun +(o) do return insert_at(o.to_s, length)
300
301 redef fun *(n)
302 do
303 var ret = new RopeString.from("")
304 for i in [0..n[ do
305 ret = (ret + self).as(RopeString)
306 end
307 return ret
308 end
309
310 # Inserts a String `str` at position `pos`
311 redef fun insert_at(str, pos)
312 do
313 if str.length == 0 then return self
314 if self.length == 0 then return new RopeString.from(str)
315
316 assert pos >= 0 and pos <= length
317
318 var path = node_at(pos)
319
320 var last_concat: Concat
321
322 if path.offset == 0 then
323 if str isa FlatString then
324 last_concat = new Concat(new StringLeaf(str), path.leaf)
325 else
326 last_concat = new Concat(str.as(RopeString).root, path.leaf)
327 end
328 else if path.offset == path.leaf.length then
329 if str isa FlatString then
330 last_concat = new Concat(path.leaf, new StringLeaf(str))
331 else
332 last_concat = new Concat(path.leaf, str.as(RopeString).root)
333 end
334 else
335 var s = path.leaf.str
336 var l_half = s.substring(0, s.length - path.offset)
337 var r_half = s.substring_from(s.length - path.offset)
338 var cct: Concat
339 var ll = new StringLeaf(l_half.as(FlatString))
340 if str isa FlatString then
341 cct = new Concat(ll, new StringLeaf(str))
342 else
343 cct = new Concat(ll, str.as(RopeString).root)
344 end
345 last_concat = new Concat(cct, new StringLeaf(r_half.as(FlatString)))
346 end
347
348 for i in path.stack.reverse_iterator do
349 if i.left then
350 last_concat = new Concat(last_concat, i.node.right)
351 else
352 last_concat = new Concat(i.node.left, last_concat)
353 end
354 end
355
356 return new RopeString.from_root(last_concat)
357 end
358
359 # O(log(n))
360 #
361 # var rope = new RopeString.from("abcd")
362 # assert rope.substring(1, 2) == "bc"
363 # assert rope.substring(-1, 2) == "a"
364 # assert rope.substring(1, 0) == ""
365 # assert rope.substring(2, 5) == "cd"
366 #
367 redef fun substring(pos, len)
368 do
369 if pos < 0 then
370 len += pos
371 pos = 0
372 end
373
374 if pos + len > length then len = length - pos
375
376 if len <= 0 then return new RopeString.from("")
377
378 var path = node_at(pos)
379
380 var lf = path.leaf
381 var offset = path.offset
382
383 if path.leaf.str.length - offset > len then lf = new StringLeaf(lf.str.substring(offset,len).as(FlatString)) else lf = new StringLeaf(lf.str.substring_from(offset).as(FlatString))
384
385 var nod: RopeNode = lf
386
387 for i in path.stack.reverse_iterator do
388 if i.right then continue
389 nod = new Concat(nod, i.node.right)
390 end
391
392 var ret = new RopeString
393 ret.root = nod
394
395 path = ret.node_at(len-1)
396
397 offset = path.offset
398 nod = new StringLeaf(path.leaf.str.substring(0, offset+1).as(FlatString))
399
400 for i in path.stack.reverse_iterator do
401 if i.left then continue
402 nod = new Concat(i.node.left, nod)
403 end
404
405 ret.root = nod
406
407 return ret
408 end
409 end
410
411 redef class FlatString
412
413 redef fun insert_at(s, pos)
414 do
415
416 if pos == 0 then
417 var r = new RopeString.from(s)
418 return r + self
419 end
420 if pos == length then
421 var r = new RopeString.from(self)
422 return r + s
423 end
424
425 var l = substring(0,pos)
426 var r = substring_from(pos)
427 var ret: String = new RopeString.from(l)
428 ret += s
429 return ret + r
430 end
431
432 end
433
434 private class RopeStringChars
435 super SequenceRead[Char]
436
437 var tgt: Rope
438
439 redef fun [](pos)
440 do
441 assert pos < tgt.length
442 var path = tgt.node_at(pos)
443 return path.leaf.str.chars[path.offset]
444 end
445
446 redef fun iterator do return iterator_from(0)
447
448 redef fun iterator_from(pos) do return new RopeCharIterator(tgt, pos)
449
450 redef fun reverse_iterator do return reverse_iterator_from(tgt.length-1)
451
452 redef fun reverse_iterator_from(pos) do return new ReverseRopeCharIterator(tgt, pos)
453 end
454
455 # Used to iterate on a Rope
456 private class IteratorElement
457
458 init(e: RopeNode)
459 do
460 if e isa Leaf then
461 left = true
462 right = true
463 end
464 node = e
465 end
466
467 # The node being visited
468 var node: RopeNode
469 # If the node has a left child, was it visited ?
470 var left = false
471 # If the node has a right child, was it visited ?
472 var right = false
473 # Was the current node visited ?
474 var done = false
475 end
476
477 # Simple Postfix iterator on the nodes of a Rope
478 private class Postfix
479 super IndexedIterator[RopeNode]
480
481 # Target Rope to iterate on
482 var target: Rope
483
484 # Current position in Rope
485 var pos: Int
486
487 # Visited nodes
488 var stack = new List[IteratorElement]
489
490 init from(tgt: Rope, pos: Int)
491 do
492 self.target = tgt
493 self.pos = pos
494 if pos < 0 or pos >= tgt.length then return
495 var path = tgt.node_at(pos)
496 self.pos -= path.offset
497 for i in path.stack do
498 var item = new IteratorElement(i.node)
499 item.left = true
500 if i.right then item.right = true
501 stack.push item
502 end
503 var item = new IteratorElement(path.leaf)
504 item.done = true
505 stack.push item
506 end
507
508 redef fun item
509 do
510 assert is_ok
511 return stack.last.node
512 end
513
514 redef fun is_ok do return not stack.is_empty
515
516 redef fun index do return pos
517
518 redef fun next do
519 if stack.is_empty then return
520 if pos > target.length-1 then
521 stack.clear
522 return
523 end
524 var lst = stack.last
525 if lst.done then
526 if lst.node isa Leaf then
527 pos += lst.node.length
528 end
529 stack.pop
530 next
531 return
532 end
533 if not lst.left then
534 lst.left = true
535 var nod = lst.node
536 if nod isa Concat and nod.left != null then
537 stack.push(new IteratorElement(nod.left.as(not null)))
538 next
539 return
540 end
541 end
542 if not lst.right then
543 lst.right = true
544 var nod = lst.node
545 if nod isa Concat and nod.right != null then
546 stack.push(new IteratorElement(nod.right.as(not null)))
547 next
548 return
549 end
550 end
551 lst.done = true
552 end
553 end
554
555 # Iterates on the leaves (substrings) of the Rope
556 class LeavesIterator
557 super IndexedIterator[Leaf]
558
559 private var nodes: Postfix
560
561 init(tgt: Rope, pos: Int)
562 do
563 nodes = tgt.postfix(pos)
564 end
565
566 redef fun is_ok do return nodes.is_ok
567
568 redef fun item
569 do
570 assert is_ok
571 return nodes.item.as(Leaf)
572 end
573
574 redef fun index do return nodes.index
575
576 redef fun next
577 do
578 while nodes.is_ok do
579 nodes.next
580 if nodes.is_ok and nodes.item isa Leaf then break
581 end
582 end
583 end
584
585 # Uses the leaves and calculates a new substring on each iteration
586 class SubstringsIterator
587 super IndexedIterator[Text]
588
589 private var nodes: IndexedIterator[Leaf]
590
591 # Current position in Rope
592 var pos: Int
593
594 # Current substring, computed from the current Leaf and indexes
595 var substring: Text
596
597 init(tgt: Rope, pos: Int)
598 do
599 nodes = tgt.leaves(pos)
600 self.pos = pos
601 if pos < 0 or pos >= tgt.length then return
602 make_substring
603 end
604
605 # Compute the bounds of the current substring and makes the substring
606 private fun make_substring
607 do
608 substring = nodes.item.str
609 var min = 0
610 var length = substring.length
611 if nodes.index < pos then
612 min = pos - nodes.index
613 end
614 substring = substring.substring(min, length)
615 end
616
617 redef fun is_ok do return nodes.is_ok
618
619 redef fun item
620 do
621 assert is_ok
622 return substring
623 end
624
625 redef fun index do return pos
626
627 redef fun next
628 do
629 pos += substring.length
630 nodes.next
631 if nodes.is_ok then make_substring
632 end
633
634 end
635
636 class RopeCharIterator
637 super IndexedIterator[Char]
638
639 var substrings: IndexedIterator[Text]
640
641 var pos: Int
642
643 var max: Int
644
645 var substr_iter: IndexedIterator[Char]
646
647 init(tgt: Rope, from: Int)
648 do
649 substrings = tgt.substrings_from(from)
650 max = tgt.length - 1
651 if not substrings.is_ok then
652 pos = tgt.length
653 return
654 end
655 pos = from
656 substr_iter = substrings.item.chars.iterator
657 end
658
659 redef fun item do return substr_iter.item
660
661 redef fun is_ok do return pos <= max
662
663 redef fun index do return pos
664
665 redef fun next
666 do
667 pos += 1
668 if substr_iter.is_ok then
669 substr_iter.next
670 end
671 if not substr_iter.is_ok then
672 substrings.next
673 if substrings.is_ok then
674 substr_iter = substrings.item.chars.iterator
675 end
676 end
677 end
678 end
679
680 private class ReversePostfix
681 super IndexedIterator[RopeNode]
682
683 var target: Rope
684
685 var pos: Int
686
687 var min = 0
688
689 var stack = new List[IteratorElement]
690
691 init from(tgt: Rope, pos: Int)
692 do
693 self.pos = pos
694 target = tgt
695 if pos < 0 or pos >= tgt.length then return
696 var path = tgt.node_at(pos)
697 self.pos -= path.offset
698 for i in path.stack do
699 var elemt = new IteratorElement(i.node)
700 elemt.right = true
701 if i.left then
702 elemt.left = true
703 end
704 stack.push elemt
705 end
706 stack.push(new IteratorElement(path.leaf))
707 stack.last.done = true
708 end
709
710 redef fun item do
711 assert is_ok
712 return stack.last.node
713 end
714
715 redef fun is_ok do return not stack.is_empty
716
717 redef fun index do return pos
718
719 redef fun next
720 do
721 if stack.is_empty then return
722 if pos < min then
723 stack.clear
724 return
725 end
726 var lst = stack.last
727 if lst.done then
728 stack.pop
729 next
730 return
731 end
732 if not lst.right then
733 var nod = lst.node.as(Concat)
734 var rgt = nod.right
735 lst.right = true
736 if rgt != null then
737 stack.push(new IteratorElement(rgt))
738 next
739 return
740 end
741 end
742 if not lst.left then
743 var nod = lst.node.as(Concat)
744 var lft = nod.left
745 lst.left = true
746 if lft != null then
747 stack.push(new IteratorElement(lft))
748 next
749 return
750 end
751 end
752 if lst.node isa Leaf then pos -= lst.node.length
753 lst.done = true
754 end
755 end
756
757 private class ReverseLeavesIterator
758 super IndexedIterator[Leaf]
759
760 var nodes: ReversePostfix
761
762 init(tgt: Rope, from: Int)
763 do
764 nodes = tgt.reverse_postfix(from)
765 end
766
767 redef fun is_ok do return nodes.is_ok
768
769 redef fun item do
770 assert is_ok
771 return nodes.item.as(Leaf)
772 end
773
774 redef fun next do
775 while nodes.is_ok do
776 nodes.next
777 if nodes.is_ok then if nodes.item isa Leaf then break
778 end
779 end
780
781 redef fun index do return nodes.index
782
783 end
784
785 private class ReverseSubstringsIterator
786 super IndexedIterator[Text]
787
788 var leaves: ReverseLeavesIterator
789
790 var pos: Int
791
792 var str: Text
793
794 init(tgt: Rope, from: Int)
795 do
796 leaves = tgt.reverse_leaves(from)
797 pos = from
798 if not leaves.is_ok then return
799 str = leaves.item.str
800 make_substring
801 end
802
803 fun make_substring
804 do
805 if pos >= (leaves.index + str.length - 1) then return
806 str = str.substring(0, (pos - leaves.index + 1))
807 end
808
809 redef fun is_ok do return leaves.is_ok
810
811 redef fun item do
812 assert is_ok
813 return str
814 end
815
816 redef fun index do return pos
817
818 redef fun next do
819 pos -= str.length
820 leaves.next
821 if not leaves.is_ok then return
822 str = leaves.item.str
823 make_substring
824 end
825 end
826
827 private class ReverseRopeCharIterator
828 super IndexedIterator[Char]
829
830 var substrs: IndexedIterator[Text]
831
832 var pos: Int
833
834 var subiter: IndexedIterator[Char]
835
836 init(tgt: Rope, from: Int)
837 do
838 substrs = tgt.reverse_substrings_from(from)
839 if not substrs.is_ok then
840 pos = -1
841 return
842 end
843 subiter = substrs.item.chars.reverse_iterator
844 pos = from
845 end
846
847 redef fun is_ok do return pos >= 0
848
849 redef fun item do
850 assert is_ok
851 return subiter.item
852 end
853
854 redef fun index do return pos
855
856 redef fun next do
857 pos -= 1
858 if subiter.is_ok then subiter.next
859 if not subiter.is_ok then
860 if substrs.is_ok then substrs.next
861 if substrs.is_ok then subiter = substrs.item.chars.reverse_iterator
862 end
863 end
864 end
865