b76784f34bf094f6b217e20a20fc1d98ad3b7f65
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatString
82
83 init(val: FlatString) do
84 self.str = val
85 length = str.length
86 end
87
88 end
89
90 # Basic structure, binary tree with a root node.
91 #
92 # Also shared services by subsequent implementations.
93 abstract class Rope
94 super Text
95
96 # Root node, entry point of a Rope.
97 private var root: RopeNode
98
99 # Empty Rope
100 init do from("")
101
102 # Creates a new Rope with `s` as root
103 init from(s: String) do
104 if s isa RopeString then root = s.root else root = new Leaf(s.as(FlatString))
105 end
106
107 private init from_root(r: RopeNode)
108 do
109 root = r
110 end
111
112 redef fun length do return root.length
113
114 # Iterator on the nodes of the rope, in forward postfix order
115 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
116
117 # Iterator on the leaves of the rope, forward order
118 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
119
120 # Path to the Leaf for `position`
121 private fun node_at(position: Int): Path
122 do
123 assert position >= 0 and position < length
124 return get_node_from(root.as(not null), 0, position, new List[PathElement])
125 end
126
127 # Builds the path to Leaf at position `seek_pos`
128 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
129 do
130 assert curr_pos >= 0
131 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
132 node = node.as(Concat)
133
134 if node.left != null then
135 var next_pos = curr_pos + node.left.length
136 stack.add(new PathElement(node))
137 if next_pos > seek_pos then
138 stack.last.left = true
139 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
140 end
141 stack.last.right = true
142 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
143 else
144 var vis = new PathElement(node)
145 vis.right = true
146 stack.add(vis)
147 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
148 end
149 end
150
151 end
152
153 # Rope that cannot be modified
154 class RopeString
155 super Rope
156 super String
157
158 redef fun to_s do return self
159
160 # Inserts a String `str` at position `pos`
161 fun insert_at(str: String, pos: Int): RopeString
162 do
163 if str.length == 0 then return self
164 if self.length == 0 then return new RopeString.from(str)
165
166 assert pos >= 0 and pos <= length
167
168 if pos == length then return append(str).as(RopeString)
169
170 var path = node_at(pos)
171
172 var last_concat = new Concat
173
174 if path.offset == 0 then
175 last_concat.right = path.leaf
176 if str isa FlatString then last_concat.left = new Leaf(str) else last_concat.left = str.as(RopeString).root
177 else if path.offset == path.leaf.length then
178 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
179 last_concat.left = path.leaf
180 else
181 var s = path.leaf.str
182 var l_half = s.substring(0, s.length - path.offset)
183 var r_half = s.substring_from(s.length - path.offset)
184 var cct = new Concat
185 cct.right = new Leaf(r_half)
186 last_concat.left = new Leaf(l_half)
187 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
188 cct.left = last_concat
189 last_concat = cct
190 end
191
192 for i in path.stack.reverse_iterator do
193 var nod = new Concat
194 if i.left then
195 nod.right = i.node.right.as(not null)
196 nod.left = last_concat
197 else
198 nod.left = i.node.left.as(not null)
199 nod.right = last_concat
200 end
201 last_concat = nod
202 end
203
204 return new RopeString.from_root(last_concat)
205 end
206
207 # Adds `s` at the end of self
208 fun append(s: String): String
209 do
210 if self.is_empty then return s
211 return new RopeString.from_root(append_to_path(root,s))
212 end
213
214 # Builds a new path from root to the rightmost node with s appended
215 private fun append_to_path(node: RopeNode, s: String): RopeNode
216 do
217 var cct = new Concat
218 if node isa Leaf then
219 cct.left = node
220 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
221 else if node isa Concat then
222 var right = node.right
223 if node.left != null then cct.left = node.left.as(not null)
224 if right == null then
225 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
226 else
227 cct.right = append_to_path(right, s)
228 end
229 end
230 return cct
231 end
232
233 # O(log(n))
234 #
235 # var rope = new RopeString.from("abcd")
236 # assert rope.substring(1, 2) == "bc"
237 # assert rope.substring(-1, 2) == "a"
238 # assert rope.substring(1, 0) == ""
239 # assert rope.substring(2, 5) == "cd"
240 #
241 redef fun substring(pos, len)
242 do
243 if pos < 0 then
244 len += pos
245 pos = 0
246 end
247
248 if pos + len > length then len = length - pos
249
250 if len <= 0 then return new RopeString.from("")
251
252 var path = node_at(pos)
253
254 var lf = path.leaf
255 var offset = path.offset
256
257 if path.leaf.str.length - offset > len then lf = new Leaf(lf.str.substring(offset,len)) else lf = new Leaf(lf.str.substring_from(offset))
258
259 var nod: RopeNode = lf
260
261 for i in path.stack.reverse_iterator do
262 if i.right then continue
263 var tmp = new Concat
264 tmp.left = nod
265 var r = i.node.right
266 if r != null then tmp.right = r
267 nod = tmp
268 end
269
270 var ret = new RopeString
271 ret.root = nod
272
273 path = ret.node_at(len-1)
274
275 offset = path.offset
276 nod = new Leaf(path.leaf.str.substring(0, offset+1))
277
278 for i in path.stack.reverse_iterator do
279 if i.left then continue
280 var tmp = new Concat
281 tmp.right = nod
282 var l = i.node.left
283 if l != null then tmp.left = l
284 nod = tmp
285 end
286
287 ret.root = nod
288
289 return ret
290 end
291 end
292
293 # Used to iterate on a Rope
294 private class IteratorElement
295
296 init(e: RopeNode)
297 do
298 if e isa Leaf then
299 left = true
300 right = true
301 end
302 node = e
303 end
304
305 # The node being visited
306 var node: RopeNode
307 # If the node has a left child, was it visited ?
308 var left = false
309 # If the node has a right child, was it visited ?
310 var right = false
311 # Was the current node visited ?
312 var done = false
313 end
314
315 # Simple Postfix iterator on the nodes of a Rope
316 private class Postfix
317 super IndexedIterator[RopeNode]
318
319 # Target Rope to iterate on
320 var target: Rope
321
322 # Current position in Rope
323 var pos: Int
324
325 # Visited nodes
326 var stack = new List[IteratorElement]
327
328 init from(tgt: Rope, pos: Int)
329 do
330 self.target = tgt
331 self.pos = pos
332 if pos < 0 or pos >= tgt.length then return
333 var path = tgt.node_at(pos)
334 self.pos -= path.offset
335 for i in path.stack do
336 var item = new IteratorElement(i.node)
337 item.left = true
338 if i.right then item.right = true
339 stack.push item
340 end
341 var item = new IteratorElement(path.leaf)
342 item.done = true
343 stack.push item
344 end
345
346 redef fun item
347 do
348 assert is_ok
349 return stack.last.node
350 end
351
352 redef fun is_ok do return not stack.is_empty
353
354 redef fun index do return pos
355
356 redef fun next do
357 if stack.is_empty then return
358 if pos > target.length-1 then
359 stack.clear
360 return
361 end
362 var lst = stack.last
363 if lst.done then
364 if lst.node isa Leaf then
365 pos += lst.node.length
366 end
367 stack.pop
368 next
369 return
370 end
371 if not lst.left then
372 lst.left = true
373 var nod = lst.node
374 if nod isa Concat and nod.left != null then
375 stack.push(new IteratorElement(nod.left.as(not null)))
376 next
377 return
378 end
379 end
380 if not lst.right then
381 lst.right = true
382 var nod = lst.node
383 if nod isa Concat and nod.right != null then
384 stack.push(new IteratorElement(nod.right.as(not null)))
385 next
386 return
387 end
388 end
389 lst.done = true
390 end
391 end
392
393 # Iterates on the leaves (substrings) of the Rope
394 class LeavesIterator
395 super IndexedIterator[Leaf]
396
397 private var nodes: Postfix
398
399 init(tgt: Rope, pos: Int)
400 do
401 nodes = tgt.postfix(pos)
402 end
403
404 redef fun is_ok do return nodes.is_ok
405
406 redef fun item
407 do
408 assert is_ok
409 return nodes.item.as(Leaf)
410 end
411
412 redef fun index do return nodes.index
413
414 redef fun next
415 do
416 while nodes.is_ok do
417 nodes.next
418 if nodes.is_ok and nodes.item isa Leaf then break
419 end
420 end
421 end
422