README: document nit_env.sh
[nit.git] / lib / buffered_ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Ropes with a special kind of Leaves that act similar to a `Buffer`
12 #
13 # When using this module, re-allocations are limited by the introduction
14 # of a larger-than-necessary buffered area for the native part of a `String`
15 # in an append-only fashion.
16 #
17 # Concretely, when concatenating two small strings of length `n` + `m` < `maxlen`
18 # What happens is that a `maxlen` byte buffer is allocated, ready to receive more
19 # bytes a posteriori without necessarily reallocating a new byte array.
20 #
21 # Theoretically, this should lower the number of concatenations
22 # and reallocations when concatenating `String` objects.
23 module buffered_ropes
24
25 intrude import core::text::ropes
26
27 # Hidden buffer, used to simulate a `FlatBuffer` on a short string.
28 #
29 # This is to be used by low-level APIs because of its lack of
30 # safety, if you use it, make sure you know what you are doing !
31 #
32 # Practically, it is the underlying representation of a `Leaf` in
33 # the `Rope` block, its advantage is that it saves a bit more space
34 # for future concatenations, without risking to overwrite previously
35 # used space, making it suitable for Strings.
36 #
37 # Note for future use : Should there be parallel capacity in Nit at
38 # some point, this is NOT thread safe !
39 private class ManualBuffer
40 var ns: NativeString is noinit
41 # Current position in the `NativeString`
42 #
43 # It is used by the clients of `ManualBuffer` as a guard
44 # to detect if the concatenation in the `ManualBuffer`
45 # is safe or not.
46 #
47 # i.e. :
48 # Say we have two strings `x` and `y` referencing the
49 # same `ManualBuffer` `b`, `y` is the concatenation of
50 # `x` and another string.
51 #
52 # If we try to concatenate a `String` `z` to `x`, a new
53 # `ManualBuffer` will be created since `pos` and `x.length`
54 # do not match.
55 #
56 # However, if we concatenate the same `String` to `y`,
57 # the contents of `z` will be copied to the `ManualBuffer`.
58 var pos = 0
59
60 init do ns = new NativeString(maxlen)
61
62 fun [](i: Int): Byte do return ns[i]
63 end
64
65 # Simple implementation of the iterator on Substrings for `Leaf`
66 #
67 # Basically just returns `self` encapsulated in a `FlatString`.
68 private class LeafSubstrings
69 super IndexedIterator[FlatText]
70
71 var leaf: Leaf
72 var str: FlatString is noinit
73 var avail = true
74
75 init do
76 str = new FlatString.with_infos(leaf.buf.ns, leaf.length, 0, leaf.length - 1)
77 end
78
79 redef fun is_ok do return avail
80
81 redef fun next do avail = false
82
83 redef fun index do return 0
84
85 redef fun item do return str
86 end
87
88 # Leaf of a `Rope`, used as a buffered area for speedy concatenation.
89 private class Leaf
90 super String
91 super Rope
92
93 var buf: ManualBuffer
94 var bns: NativeString is noinit
95 redef var length is noinit
96
97 # Unsafe, but since it is an experiment, don't mind
98 redef fun bytelen do return length
99
100 redef fun empty do return new Leaf(new ManualBuffer)
101
102 redef fun to_cstring do
103 var len = length
104 var ns = new NativeString(len + 1)
105 ns[len] = 0u8
106 buf.ns.copy_to(ns, len, 0, 0)
107 return ns
108 end
109
110 redef fun substrings do return new LeafSubstrings(self)
111
112 redef fun [](i) do return buf[i].to_i.code_point
113
114 init do
115 bns = buf.ns
116 length = buf.pos
117 end
118
119 redef fun output do new FlatString.with_infos(buf.ns, length, 0, length - 1).output
120
121 redef fun to_upper do
122 var x = new FlatBuffer
123 for i in chars do x.add(i.to_upper)
124 return x.to_s
125 end
126
127 redef fun to_lower do
128 var x = new FlatBuffer
129 for i in chars do x.add(i.to_lower)
130 return x.to_s
131 end
132
133 redef fun reversed do
134 var x = new ManualBuffer
135 var nns = x.ns
136 var ns = bns
137 var mlen = length
138 var j = mlen - 1
139 for i in [0 .. mlen[ do
140 nns[j] = ns[i]
141 j -= 1
142 end
143 x.pos = mlen - 1
144 return new Leaf(x)
145 end
146
147 redef fun substring(from, len) do
148 return new FlatString.with_infos(buf.ns, len, from, from + len - 1)
149 end
150
151 redef fun insert_at(s, pos) do
152 var l = substring(0, pos)
153 var r = substring_from(pos)
154 return l + s + r
155 end
156
157 redef fun +(o) do
158 var s = o.to_s
159 var slen = s.bytelen
160 var mlen = bytelen
161 if slen == 0 then return self
162 if mlen == 0 then return s
163 var nlen = mlen + slen
164 if nlen > maxlen then return new Concat(self, s)
165 if s isa FlatString then
166 var bpos = buf.pos
167 var sits = s.items
168 if bpos == mlen then
169 sits.copy_to(buf.ns, slen, s.first_byte, bpos)
170 buf.pos = bpos + slen
171 return new Leaf(buf)
172 else
173 var b = new ManualBuffer
174 var nbns = b.ns
175 bns.copy_to(nbns, mlen, 0, 0)
176 sits.copy_to(nbns, slen, s.first_byte, mlen)
177 b.pos = nlen
178 return new Leaf(b)
179 end
180 else if s isa Leaf then
181 var bpos = buf.pos
182 var sbns = s.bns
183 if bpos == mlen then
184 sbns.copy_to(bns, slen, 0, bpos)
185 buf.pos += slen
186 return new Leaf(buf)
187 else
188 var b = new ManualBuffer
189 var nbns = b.ns
190 bns.copy_to(nbns, mlen, 0, 0)
191 sbns.copy_to(nbns, slen, 0, mlen)
192 b.pos = nlen
193 return new Leaf(b)
194 end
195 else if s isa Concat then
196 if not s.left isa Concat then
197 return new Concat(self + s.left, s.right)
198 end
199 return new Concat(self, s)
200 else
201 var bpos = buf.pos
202 var b = buf
203 if bpos != mlen then
204 b = new ManualBuffer
205 bns.copy_to(b.ns, mlen, 0, 0)
206 end
207 for i in s.bytes do
208 bns[bpos] = i
209 bpos += 1
210 end
211 return new Leaf(b)
212 end
213 end
214 end
215
216 redef class Concat
217 redef fun to_cstring do
218 var len = length
219 var ns = new NativeString(len + 1)
220 ns[len] = 0u8
221 var off = 0
222 for i in substrings do
223 var ilen = i.length
224 if i isa FlatString then
225 i.items.copy_to(ns, ilen, i.first_byte, off)
226 else if i isa Leaf then
227 i.buf.ns.copy_to(ns, ilen, 0, off)
228 else
229 abort
230 end
231 off += ilen
232 end
233 return ns
234 end
235
236 redef fun +(o) do
237 var s = o.to_s
238 var slen = s.length
239 if s isa FlatString then
240 var r = right
241 var rlen = r.length
242 if rlen + slen > maxlen then return new Concat(left, new Concat(r, s))
243 return new Concat(left, r + s)
244 else if s isa Concat then
245 return new Concat(self, s)
246 else if s isa Leaf then
247 var r = right
248 var rlen = r.length
249 if rlen + slen > maxlen then return new Concat(left, new Concat(r, s))
250 return new Concat(left, r + s)
251 else
252 abort
253 end
254 end
255 end
256
257 redef class FlatString
258 redef fun +(o) do
259 var s = o.to_s
260 var slen = s.length
261 var mlen = length
262 if slen == 0 then return self
263 if mlen == 0 then return s
264 if s isa FlatString then
265 if slen + mlen > maxlen then return new Concat(self, s)
266 var mits = items
267 var sifrom = s.first_byte
268 var mifrom = first_byte
269 var sits = s.items
270 var b = new ManualBuffer
271 var bns = b.ns
272 mits.copy_to(bns, mlen, mifrom, 0)
273 sits.copy_to(bns, slen, sifrom, mlen)
274 b.pos = mlen + slen
275 return new Leaf(b)
276 else if s isa Concat then
277 var sl = s.left
278 var sllen = sl.length
279 if sllen + mlen > maxlen then return new Concat(self, s)
280 return new Concat(sl + self, s.right)
281 else if s isa Leaf then
282 if slen + mlen > maxlen then return new Concat(self, s)
283 var mifrom = first_byte
284 var sb = s.buf
285 var b = new ManualBuffer
286 var bns = b.ns
287 items.copy_to(bns, mlen, mifrom, 0)
288 sb.ns.copy_to(bns, slen, 0, mlen)
289 b.pos = mlen + slen
290 return new Leaf(b)
291 else
292 abort
293 end
294 end
295 end
296
297 redef class Array[E]
298
299 # Fast implementation
300 redef fun to_s do
301 var l = length
302 if l == 0 then return ""
303 if l == 1 then if self[0] == null then return "" else return self[0].to_s
304 var its = _items
305 var na = new NativeArray[String](l)
306 var i = 0
307 var sl = 0
308 var mypos = 0
309 while i < l do
310 var itsi = its[i]
311 if itsi == null then
312 i += 1
313 continue
314 end
315 var tmp = itsi.to_s
316 sl += tmp.length
317 na[mypos] = tmp
318 i += 1
319 mypos += 1
320 end
321 var ns = new NativeString(sl + 1)
322 ns[sl] = 0u8
323 i = 0
324 var off = 0
325 while i < mypos do
326 var tmp = na[i]
327 var tpl = tmp.length
328 if tmp isa FlatString then
329 tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
330 off += tpl
331 else
332 for j in tmp.substrings do
333 var slen = j.length
334 if j isa FlatString then
335 j.items.copy_to(ns, slen, j.first_byte, off)
336 else if j isa Leaf then
337 j.buf.ns.copy_to(ns, slen, 0, off)
338 end
339 off += slen
340 end
341 end
342 i += 1
343 end
344 return ns.to_s_with_length(sl)
345 end
346 end