a2a80238c76c72f552c2b5f369d2355847cc726d
[nit.git] / lib / buffered_ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Ropes with a special kind of Leaves that act similar to a `Buffer`
12 #
13 # When using this module, re-allocations are limited by the introduction
14 # of a larger-than-necessary buffered area for the native part of a `String`
15 # in an append-only fashion.
16 #
17 # Concretely, when concatenating two small strings of length `n` + `m` < `maxlen`
18 # What happens is that a `maxlen` byte buffer is allocated, ready to receive more
19 # bytes a posteriori without necessarily reallocating a new byte array.
20 #
21 # Theoretically, this should lower the number of concatenations
22 # and reallocations when concatenating `String` objects.
23 module buffered_ropes
24
25 intrude import standard::text::ropes
26
27 # Hidden buffer, used to simulate a `FlatBuffer` on a short string.
28 #
29 # This is to be used by low-level APIs because of its lack of
30 # safety, if you use it, make sure you know what you are doing !
31 #
32 # Practically, it is the underlying representation of a `Leaf` in
33 # the `Rope` block, its advantage is that it saves a bit more space
34 # for future concatenations, without risking to overwrite previously
35 # used space, making it suitable for Strings.
36 #
37 # Note for future use : Should there be parallel capacity in Nit at
38 # some point, this is NOT thread safe !
39 private class ManualBuffer
40 var ns: NativeString is noinit
41 # Current position in the `NativeString`
42 #
43 # It is used by the clients of `ManualBuffer` as a guard
44 # to detect if the concatenation in the `ManualBuffer`
45 # is safe or not.
46 #
47 # i.e. :
48 # Say we have two strings `x` and `y` referencing the
49 # same `ManualBuffer` `b`, `y` is the concatenation of
50 # `x` and another string.
51 #
52 # If we try to concatenate a `String` `z` to `x`, a new
53 # `ManualBuffer` will be created since `pos` and `x.length`
54 # do not match.
55 #
56 # However, if we concatenate the same `String` to `y`,
57 # the contents of `z` will be copied to the `ManualBuffer`.
58 var pos = 0
59
60 init do ns = new NativeString(maxlen)
61
62 fun [](i: Int): Byte do return ns[i]
63 end
64
65 # Simple implementation of the iterator on Substrings for `Leaf`
66 #
67 # Basically just returns `self` encapsulated in a `FlatString`.
68 private class LeafSubstrings
69 super IndexedIterator[FlatText]
70
71 var leaf: Leaf
72 var str: FlatString is noinit
73 var avail = true
74
75 init do
76 str = new FlatString.with_infos(leaf.buf.ns, leaf.length, 0, leaf.length - 1)
77 end
78
79 redef fun is_ok do return avail
80
81 redef fun next do avail = false
82
83 redef fun index do return 0
84
85 redef fun item do return str
86 end
87
88 # Leaf of a `Rope`, used as a buffered area for speedy concatenation.
89 private class Leaf
90 super String
91 super Rope
92
93 var buf: ManualBuffer
94 var bns: NativeString is noinit
95 redef var length is noinit
96
97 redef fun empty do return new Leaf(new ManualBuffer)
98
99 redef fun to_cstring do
100 var len = length
101 var ns = new NativeString(len + 1)
102 ns[len] = 0u8
103 buf.ns.copy_to(ns, len, 0, 0)
104 return ns
105 end
106
107 redef fun substrings do return new LeafSubstrings(self)
108
109 redef fun [](i) do return buf[i].to_i.ascii
110
111 init do
112 bns = buf.ns
113 length = buf.pos
114 end
115
116 redef fun output do new FlatString.with_infos(buf.ns, length, 0, length - 1).output
117
118 redef fun to_upper do
119 var x = new FlatBuffer
120 for i in chars do x.add(i.to_upper)
121 return x.to_s
122 end
123
124 redef fun to_lower do
125 var x = new FlatBuffer
126 for i in chars do x.add(i.to_lower)
127 return x.to_s
128 end
129
130 redef fun reversed do
131 var x = new ManualBuffer
132 var nns = x.ns
133 var ns = bns
134 var mlen = length
135 var j = mlen - 1
136 for i in [0 .. mlen[ do
137 nns[j] = ns[i]
138 j -= 1
139 end
140 x.pos = mlen - 1
141 return new Leaf(x)
142 end
143
144 redef fun substring(from, len) do
145 return new FlatString.with_infos(buf.ns, len, from, from + len - 1)
146 end
147
148 redef fun insert_at(s, pos) do
149 var l = substring(0, pos)
150 var r = substring_from(pos)
151 return l + s + r
152 end
153
154 redef fun +(o) do
155 var s = o.to_s
156 var slen = s.bytelen
157 var mlen = bytelen
158 if slen == 0 then return self
159 if mlen == 0 then return s
160 var nlen = mlen + slen
161 if nlen > maxlen then return new Concat(self, s)
162 if s isa FlatString then
163 var bpos = buf.pos
164 var sits = s.items
165 if bpos == mlen then
166 sits.copy_to(buf.ns, slen, s.index_from, bpos)
167 buf.pos = bpos + slen
168 return new Leaf(buf)
169 else
170 var b = new ManualBuffer
171 var nbns = b.ns
172 bns.copy_to(nbns, mlen, 0, 0)
173 sits.copy_to(nbns, slen, s.index_from, mlen)
174 b.pos = nlen
175 return new Leaf(b)
176 end
177 else if s isa Leaf then
178 var bpos = buf.pos
179 var sbns = s.bns
180 if bpos == mlen then
181 sbns.copy_to(bns, slen, 0, bpos)
182 buf.pos += slen
183 return new Leaf(buf)
184 else
185 var b = new ManualBuffer
186 var nbns = b.ns
187 bns.copy_to(nbns, mlen, 0, 0)
188 sbns.copy_to(nbns, slen, 0, mlen)
189 b.pos = nlen
190 return new Leaf(b)
191 end
192 else if s isa Concat then
193 if not s.left isa Concat then
194 return new Concat(self + s.left, s.right)
195 end
196 return new Concat(self, s)
197 else
198 var bpos = buf.pos
199 var b = buf
200 if bpos != mlen then
201 b = new ManualBuffer
202 bns.copy_to(b.ns, mlen, 0, 0)
203 end
204 for i in s.bytes do
205 bns[bpos] = i
206 bpos += 1
207 end
208 return new Leaf(b)
209 end
210 end
211 end
212
213 redef class Concat
214 redef fun to_cstring do
215 var len = length
216 var ns = new NativeString(len + 1)
217 ns[len] = 0u8
218 var off = 0
219 for i in substrings do
220 var ilen = i.length
221 if i isa FlatString then
222 i.items.copy_to(ns, ilen, i.index_from, off)
223 else if i isa Leaf then
224 i.buf.ns.copy_to(ns, ilen, 0, off)
225 else
226 abort
227 end
228 off += ilen
229 end
230 return ns
231 end
232
233 redef fun +(o) do
234 var s = o.to_s
235 var slen = s.length
236 if s isa FlatString then
237 var r = right
238 var rlen = r.length
239 if rlen + slen > maxlen then return new Concat(left, new Concat(r, s))
240 return new Concat(left, r + s)
241 else if s isa Concat then
242 return new Concat(self, s)
243 else if s isa Leaf then
244 var r = right
245 var rlen = r.length
246 if rlen + slen > maxlen then return new Concat(left, new Concat(r, s))
247 return new Concat(left, r + s)
248 else
249 abort
250 end
251 end
252 end
253
254 redef class FlatString
255 redef fun +(o) do
256 var s = o.to_s
257 var slen = s.length
258 var mlen = length
259 if slen == 0 then return self
260 if mlen == 0 then return s
261 if s isa FlatString then
262 if slen + mlen > maxlen then return new Concat(self, s)
263 var mits = items
264 var sifrom = s.index_from
265 var mifrom = index_from
266 var sits = s.items
267 var b = new ManualBuffer
268 var bns = b.ns
269 mits.copy_to(bns, mlen, mifrom, 0)
270 sits.copy_to(bns, slen, sifrom, mlen)
271 b.pos = mlen + slen
272 return new Leaf(b)
273 else if s isa Concat then
274 var sl = s.left
275 var sllen = sl.length
276 if sllen + mlen > maxlen then return new Concat(self, s)
277 return new Concat(sl + self, s.right)
278 else if s isa Leaf then
279 if slen + mlen > maxlen then return new Concat(self, s)
280 var mifrom = index_from
281 var sb = s.buf
282 var b = new ManualBuffer
283 var bns = b.ns
284 items.copy_to(bns, mlen, mifrom, 0)
285 sb.ns.copy_to(bns, slen, 0, mlen)
286 b.pos = mlen + slen
287 return new Leaf(b)
288 else
289 abort
290 end
291 end
292 end
293
294 redef class Array[E]
295
296 # Fast implementation
297 redef fun to_s do
298 var l = length
299 if l == 0 then return ""
300 if l == 1 then if self[0] == null then return "" else return self[0].to_s
301 var its = _items
302 var na = new NativeArray[String](l)
303 var i = 0
304 var sl = 0
305 var mypos = 0
306 while i < l do
307 var itsi = its[i]
308 if itsi == null then
309 i += 1
310 continue
311 end
312 var tmp = itsi.to_s
313 sl += tmp.length
314 na[mypos] = tmp
315 i += 1
316 mypos += 1
317 end
318 var ns = new NativeString(sl + 1)
319 ns[sl] = 0u8
320 i = 0
321 var off = 0
322 while i < mypos do
323 var tmp = na[i]
324 var tpl = tmp.length
325 if tmp isa FlatString then
326 tmp.items.copy_to(ns, tpl, tmp.index_from, off)
327 off += tpl
328 else
329 for j in tmp.substrings do
330 var slen = j.length
331 if j isa FlatString then
332 j.items.copy_to(ns, slen, j.index_from, off)
333 else if j isa Leaf then
334 j.buf.ns.copy_to(ns, slen, 0, off)
335 end
336 off += slen
337 end
338 end
339 i += 1
340 end
341 return ns.to_s_with_length(sl)
342 end
343 end