README: Update libgc's URL
[nit.git] / lib / text_stat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Injects stat-calculating functionalities to Text and its variants
12 #
13 # Every allocation is counted for each available type of Text in Core
14 #
15 # Cached operations are monitored and statistics of their use are printed
16 # at the end of the execution of a program
17 module text_stat
18
19 intrude import core::text::ropes
20 import counter
21
22 redef class Sys
23
24 # Counts the number of allocations of UnicodeFlatString
25 var uniflatstr_allocations = 0
26
27 # Counts the number of allocations of ASCIIFlatString
28 var asciiflatstr_allocations = 0
29
30 # Counts the number of allocations of FlatBuffer
31 var flatbuf_allocations = 0
32
33 # Counts the number of allocations of Concat
34 var concat_allocations = 0
35
36 # Counts the number of allocations of RopeBuffer
37 var ropebuf_allocations = 0
38
39 # Counts the number of calls to property length
40 var length_calls = new Counter[String]
41
42 # Counts the number of length calls that missed the cache
43 var length_cache_miss = new Counter[String]
44
45 # Counts the number of call to index on a Text
46 var index_call = new Counter[String]
47
48 # Count the number of times that an indexed access
49 # on a Concat caused a regeneration of the cache
50 var concat_cache_miss = 0
51
52 # Distance between characters when looking for a character in a FlatString
53 var index_len = new Counter[Int]
54
55 # Length (bytes) of the FlatString created by lib
56 var str_bytelen = new Counter[Int]
57
58 # Counter of the times `bytelen` is called on FlatString
59 var bytelen_call = new Counter[String]
60
61 # Counter of the times `bytepos` is called on each type of receiver
62 var position_call = new Counter[String]
63
64 # Counter of the times `bytepos` is called on each type of receiver
65 var bytepos_call = new Counter[String]
66
67 # Calls to the `first_byte` property of a FlatString
68 var first_byte_call = 0
69
70 # Calls to the `last_byte` property of a FlatString
71 var last_byte_call = 0
72
73 # Number of strings created with full length created
74 var str_full_created = 0
75
76 private fun show_string_stats do
77 print """
78 Usage of Strings:
79
80 Allocations, by type:
81 """
82 print "\t-UnicodeFlatString = {uniflatstr_allocations}"
83 print "\t-ASCIIFlatString = {asciiflatstr_allocations}"
84 print "\t-FlatBuffer = {flatbuf_allocations}"
85 print "\t-Concat = {concat_allocations}"
86 print "\t-RopeBuffer = {ropebuf_allocations}"
87 print ""
88 print "Calls to length, by type:"
89 for k, v in length_calls do
90 printn "\t{k} = {v}"
91 if k == "UnicodeFlatString" then printn " (cache misses {length_cache_miss[k]}, {div(length_cache_miss[k] * 100, v)}%)"
92 printn "\n"
93 end
94 print "Indexed accesses, by type:"
95 for k, v in index_call do
96 printn "\t{k} = {v}"
97 if k == "Concat" then printn " (cache misses {concat_cache_miss}, {div(concat_cache_miss * 100, v)}%)"
98 printn "\n"
99 end
100
101 print "Calls to bytelen for each type:"
102 for k, v in bytelen_call do
103 print "\t{k} = {v}"
104 end
105
106 print "Calls to position for each type:"
107 for k, v in position_call do
108 print "\t{k} = {v}"
109 end
110
111 print "Calls to bytepos for each type:"
112 for k, v in bytepos_call do
113 print "\t{k} = {v}"
114 end
115
116 print "Calls to first_byte on FlatString {first_byte_call}"
117 print "Calls to last_byte on FlatString {last_byte_call}"
118
119 print "Length of travel for index distribution:"
120 index_len.print_content
121
122 print "Byte length of the FlatStrings created:"
123 str_bytelen.print_content
124 end
125
126 redef fun run do
127 super
128 show_string_stats
129 end
130 end
131
132 redef fun exit(i) do
133 show_string_stats
134 super
135 end
136
137 redef class Concat
138 init do
139 sys.concat_allocations += 1
140 end
141
142 redef fun bytelen do
143 sys.bytelen_call.inc "Concat"
144 return super
145 end
146
147 redef fun [](i) do
148 sys.index_call.inc "Concat"
149 if flat_last_pos_start != -1 then
150 var fsp = i - flat_last_pos_start
151 if fsp >= 0 and fsp < flat_cache.length then return flat_cache[fsp]
152 end
153 sys.concat_cache_miss += 1
154 var s: String = self
155 var st = i
156 loop
157 if s isa FlatString then break
158 s = s.as(Concat)
159 var lft = s.left
160 var llen = lft.length
161 if i >= llen then
162 s = s.right
163 i -= llen
164 else
165 s = s.left
166 end
167 end
168 flat_last_pos_start = st - i
169 flat_cache = s
170 return s[i]
171 end
172 end
173
174 redef class FlatText
175 redef fun char_to_byte_index(index) do
176 var ln = length
177 assert index >= 0
178 assert index < ln
179
180 # Find best insertion point
181 var delta_begin = index
182 var delta_end = (ln - 1) - index
183 var delta_cache = (position - index).abs
184 var min = delta_begin
185 var its = items
186
187 if delta_cache < min then min = delta_cache
188 if delta_end < min then min = delta_end
189
190 var ns_i: Int
191 var my_i: Int
192
193 if min == delta_begin then
194 ns_i = first_byte
195 my_i = 0
196 else if min == delta_cache then
197 ns_i = bytepos
198 my_i = position
199 else
200 ns_i = its.find_beginning_of_char_at(last_byte)
201 my_i = length - 1
202 end
203
204 var from = ns_i
205
206 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
207
208 var after = ns_i
209
210 sys.index_len.inc((after - from).abs)
211
212 position = index
213 bytepos = ns_i
214
215 return ns_i
216 end
217 end
218
219 redef class RopeBuffer
220 init do
221 sys.ropebuf_allocations += 1
222 end
223
224 redef fun bytelen do
225 sys.bytelen_call.inc "RopeBuffer"
226 return super
227 end
228
229 redef fun [](i) do
230 sys.index_call.inc "RopeBuffer"
231 return super
232 end
233 end
234
235 redef class FlatBuffer
236
237 init do
238 sys.flatbuf_allocations += 1
239 end
240
241 redef fun bytepos do
242 sys.bytepos_call.inc "FlatBuffer"
243 return super
244 end
245
246 redef fun bytepos=(p) do
247 sys.bytepos_call.inc "FlatBuffer"
248 super
249 end
250
251 redef fun position do
252 sys.position_call.inc "FlatBuffer"
253 return super
254 end
255
256 redef fun position=(p) do
257 sys.position_call.inc "FlatBuffer"
258 super
259 end
260
261 redef fun bytelen do
262 sys.bytelen_call.inc "FlatBuffer"
263 return super
264 end
265
266 redef fun length do
267 sys.length_calls.inc "FlatBuffer"
268 return super
269 end
270
271 redef fun [](i) do
272 sys.index_call.inc "FlatBuffer"
273 return super
274 end
275
276 redef fun char_to_byte_index(i) do
277 sys.index_call.inc "FlatBuffer"
278 return super
279 end
280 end
281
282 redef class FlatString
283
284 redef fun bytepos do
285 sys.bytepos_call.inc "FlatString"
286 return super
287 end
288
289 redef fun bytepos=(p) do
290 sys.bytepos_call.inc "FlatString"
291 super
292 end
293
294 redef fun position do
295 sys.position_call.inc "FlatString"
296 return super
297 end
298
299 redef fun position=(p) do
300 sys.position_call.inc "FlatString"
301 super
302 end
303
304 redef fun bytelen do
305 sys.bytelen_call.inc "FlatString"
306 return super
307 end
308
309 redef fun first_byte do
310 sys.first_byte_call += 1
311 return super
312 end
313
314 redef fun first_byte=(v) do
315 sys.first_byte_call += 1
316 super
317 end
318
319 redef fun last_byte do
320 sys.last_byte_call += 1
321 return super
322 end
323
324 private var length_cache: nullable Int = null
325
326 redef fun length do
327 sys.length_calls.inc "FlatString"
328 var l = length_cache
329 if l != null then return l
330 sys.length_cache_miss.inc "FlatString"
331 if bytelen == 0 then return 0
332 var st = first_byte
333 var its = items
334 var ln = 0
335 var lst = last_byte
336 while st <= lst do
337 st += its.length_of_char_at(st)
338 ln += 1
339 end
340 length_cache = ln
341 return ln
342 end
343
344 redef fun char_to_byte_index(i) do
345 sys.index_call.inc "FlatString"
346 return super
347 end
348 end
349
350 redef class ASCIIFlatString
351 redef init full_data(items, bytelen, from, length)
352 do
353 super
354 sys.asciiflatstr_allocations += 1
355 sys.str_full_created += 1
356 end
357 end
358
359 redef class UnicodeFlatString
360 redef init full_data(items, bytelen, from, length)
361 do
362 super
363 sys.uniflatstr_allocations += 1
364 sys.str_full_created += 1
365 end
366 end