lib/string_exp/utf8: Implemented to_upper/to_lower on UnicodeChar.
[nit.git] / lib / bufferized_ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Introduces ropes with buffered leaves
12 module bufferized_ropes
13
14 import standard
15 intrude import standard::ropes
16
17 # Leaf containing a FlatBuffer to optimize concatenation operations
18 private class BufferLeaf
19 super Leaf
20
21 init(val: FlatBuffer) do
22 self.str = val
23 length = str.length
24 end
25
26 end
27
28 redef class Concat
29 redef fun to_leaf
30 do
31 if left == null then
32 if right == null then return new StringLeaf("".as(FlatString))
33 return right.to_leaf
34 end
35 if right == null then return left.as(not null).to_leaf
36 if left.length + right.length < buf_len then
37 var b = new FlatBuffer.with_capacity(buf_len)
38 b.append(left.to_leaf.str)
39 b.append(right.to_leaf.str)
40 return new BufferLeaf(b)
41 else
42 var b = new FlatBuffer.with_capacity(left.length + right.length)
43 b.append(left.to_leaf.str)
44 b.append(right.to_leaf.str)
45 return new StringLeaf(b.lazy_to_s(b.length))
46 end
47 end
48 end
49
50 redef class FlatText
51
52 # Creates a substring, only without any copy overhead for Buffers
53 # The call to lazy_substring ensures the creation of a FlatString, which is required for Leaves.
54 private fun lazy_substring(from: Int, length: Int): FlatString is abstract
55
56 # Same as substring_from, but without copy of the data for Buffers.
57 private fun lazy_substring_from(from: Int): FlatString is abstract
58 end
59
60 redef class FlatBuffer
61
62 # Same as to_s, only will not copy self before returning a String.
63 private fun lazy_to_s(len: Int): FlatString
64 do
65 return new FlatString.with_infos(items, len, 0, length - 1)
66 end
67
68 redef fun lazy_substring(from,length)
69 do
70 return new FlatString.with_infos(items, length, from, from + length - 1)
71 end
72
73 redef fun lazy_substring_from(from)
74 do
75 var newlen = length - from
76 return new FlatString.with_infos(items, newlen, from, from + newlen - 1)
77 end
78
79 end
80
81 redef class FlatString
82
83 redef fun lazy_substring(from, len) do return substring(from,len).as(FlatString)
84
85 redef fun lazy_substring_from(from) do return substring_from(from).as(FlatString)
86
87 end
88
89 redef class Rope
90
91 # Empty Rope
92 init do root = new BufferLeaf(new FlatBuffer.with_capacity(buf_len))
93
94 end
95
96 redef class RopeString
97
98 init from(s: String) do
99 if s.length < buf_len then
100 var b = new FlatBuffer.with_capacity(buf_len)
101 b.append(s)
102 root = new BufferLeaf(b)
103 else
104 if s isa RopeString then root = s.root else root = new StringLeaf(s.as(FlatString))
105 end
106 end
107
108 # Inserts a String `str` at position `pos`
109 redef fun insert_at(str, pos)
110 do
111 if str.length == 0 then return self
112
113 assert pos >= 0 and pos <= length
114
115 if pos == length then
116 var r = root
117 if r isa BufferLeaf then
118 var b = r.str.as(FlatBuffer)
119 if r.length + str.length < b.capacity then
120 b.append(str)
121 return new RopeString.from_root(new BufferLeaf(b))
122 end
123 end
124 end
125
126 var path = node_at(pos)
127
128 var cct: RopeNode
129
130 if path.offset == path.leaf.length then
131 cct = build_node_len_offset(path, str)
132 else if path.offset == 0 then
133 cct = build_node_zero_offset(path, str)
134 else
135 cct = build_node_other(path,str)
136 end
137
138 if path.stack.is_empty then return new RopeString.from_root(cct)
139
140 var tmp = path.stack.pop
141 var last_concat: Concat
142
143 if tmp.left then
144 last_concat = new Concat(cct,tmp.node.right.as(not null))
145 else
146 last_concat = new Concat(tmp.node.left.as(not null), cct)
147 end
148
149 for i in path.stack.reverse_iterator do
150 var nod: Concat
151 if i.left then
152 nod = new Concat(last_concat, i.node.right.as(not null))
153 else
154 nod = new Concat(i.node.left.as(not null), last_concat)
155 end
156 last_concat = nod
157 end
158
159 return new RopeString.from_root(last_concat)
160 end
161
162 redef fun substring(pos, len)
163 do
164 if pos < 0 then
165 len += pos
166 pos = 0
167 end
168
169 if pos + len > length then len = length - pos
170
171 if len <= 0 then return new RopeString
172
173 var path = node_at(pos)
174
175 var lf = path.leaf
176 var offset = path.offset
177
178 var s: FlatString
179 if lf isa StringLeaf then
180 s = lf.str.as(FlatString)
181 else
182 s = lf.str.as(FlatBuffer).lazy_to_s(lf.length)
183 end
184
185 if path.leaf.str.length - offset > len then
186 lf = new StringLeaf(s.substring(offset,len).as(FlatString))
187 else
188 lf = new StringLeaf(s.substring_from(offset).as(FlatString))
189 end
190
191 var nod: RopeNode = lf
192
193 if lf.length == len then return new RopeString.from_root(lf)
194
195 var lft: nullable RopeNode
196 var rht: nullable RopeNode
197
198 for i in path.stack.reverse_iterator do
199 if i.right then continue
200 lft = nod
201 rht = i.node.right
202 nod = new Concat(lft, rht)
203 end
204
205 var ret = new RopeString
206 ret.root = nod
207
208 path = ret.node_at(len-1)
209
210 offset = path.offset
211
212 lf = path.leaf
213
214 if lf isa StringLeaf then
215 s = lf.str.as(FlatString)
216 else
217 s = lf.str.as(FlatBuffer).lazy_to_s(lf.length)
218 end
219
220 nod = new StringLeaf(s.substring(0, offset+1).as(FlatString))
221
222 for i in path.stack.reverse_iterator do
223 if i.left then continue
224 rht = nod
225 lft = i.node.left
226 nod = new Concat(lft, rht)
227 end
228
229 ret.root = nod
230
231 return ret
232 end
233
234 private fun build_node_zero_offset(path: Path, s: String): RopeNode
235 do
236 var finlen = path.leaf.length + s.length
237 if finlen <= buf_len then
238 var b = new FlatBuffer.with_capacity(buf_len)
239 b.append(s)
240 b.append(path.leaf.str)
241 if finlen == buf_len then return new StringLeaf(b.lazy_to_s(finlen))
242 return new BufferLeaf(b)
243 end
244 var rht = path.leaf
245 var lft: RopeNode
246 if s isa FlatString then
247 if s.length > buf_len then
248 lft = new StringLeaf(s)
249 else
250 var b = new FlatBuffer
251 b.append(s)
252 lft = new BufferLeaf(b)
253 end
254 else
255 lft = s.as(RopeString).root
256 end
257 return new Concat(lft, rht)
258 end
259
260 private fun build_node_len_offset(path: Path, s: String): RopeNode
261 do
262 var leaf = path.leaf
263 if leaf isa BufferLeaf then
264 if s.length > buf_len then
265 if s isa FlatString then
266 return new Concat(leaf, new StringLeaf(s))
267 else
268 return new Concat(leaf, s.as(Rope).root)
269 end
270 end
271 var finlen = leaf.length + s.length
272 var buf = leaf.str.as(FlatBuffer)
273 var cap = buf.capacity
274 # Meaning the buffer was modified elsewhere
275 # Therefore, we create a new one
276 if leaf.length != buf.length then
277 var b = new FlatBuffer.with_capacity(buf_len)
278 b.append(buf.lazy_to_s(leaf.length))
279 buf = b
280 end
281 if finlen <= cap then
282 buf.append(s)
283 if finlen == buf_len then return new StringLeaf(buf.lazy_to_s(finlen))
284 return new BufferLeaf(buf)
285 else
286 var l_len = finlen - cap
287 buf.append(s.substring(0,l_len))
288 var b2 = new FlatBuffer.with_capacity(buf_len)
289 b2.append(s.substring_from(l_len))
290 var left_leaf = new StringLeaf(buf.lazy_to_s(buf.length))
291 var right_leaf = new BufferLeaf(b2)
292 var cct = new Concat(left_leaf, right_leaf)
293 return cct
294 end
295 else
296 var lft = leaf
297 var rht: RopeNode
298 if s.length >= buf_len then
299 if s isa FlatString then rht = new StringLeaf(s) else rht = s.as(Rope).root
300 else
301 var buf = new FlatBuffer.with_capacity(buf_len)
302 buf.append(s)
303 rht = new BufferLeaf(buf)
304 end
305 return new Concat(lft,rht)
306 end
307 end
308
309 private fun build_node_other(path: Path,str: String): RopeNode
310 do
311 var lf = path.leaf
312 var s: FlatString
313 if lf isa BufferLeaf then
314 var b = lf.str.as(FlatBuffer)
315 s = b.lazy_to_s(lf.length)
316 else
317 s = lf.str.as(FlatString)
318 end
319 var l_str = s.lazy_substring(0, path.offset)
320 var r_str = s.lazy_substring_from(path.offset)
321 if s.length + str.length < buf_len then
322 var buf = new FlatBuffer.with_capacity(buf_len)
323 buf.append(l_str)
324 buf.append(str)
325 buf.append(r_str)
326 return new BufferLeaf(buf)
327 end
328 var child: RopeNode
329 if str isa FlatString then child = new StringLeaf(str) else child = str.as(Rope).root
330 var l_cct = new Concat(new StringLeaf(l_str), child)
331 var r_cct = new Concat(l_cct, new StringLeaf(r_str))
332 return r_cct
333 end
334
335 end
336
337 redef class SubstringsIterator
338
339 # Compute the bounds of the current substring and makes the substring
340 redef fun make_substring
341 do
342 var l = nodes.item
343 var s = l.str
344 var min = 0
345 var length = l.length
346 if nodes.index < pos then
347 min = pos - nodes.index
348 end
349 substring = s.lazy_substring(min, length)
350 end
351
352 end
353
354 redef class ReverseSubstringsIterator
355
356 redef fun make_substring
357 do
358 var l = leaves.item
359 var s = l.str
360 if pos > (leaves.index + l.length - 1) then return
361 str = s.lazy_substring(0, (pos - leaves.index + 1))
362 end
363
364 end
365
366 # Default size of a buffer in a rope leaf.
367 fun buf_len: Int do return 200
368