README: document nit_env.sh
[nit.git] / lib / text_stat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Injects stat-calculating functionalities to Text and its variants
12 #
13 # Every allocation is counted for each available type of Text in Core
14 #
15 # Cached operations are monitored and statistics of their use are printed
16 # at the end of the execution of a program
17 module text_stat
18
19 intrude import core::text::ropes
20 import counter
21
22 redef class Sys
23
24 # Counts the number of allocations of FlatString
25 var flatstr_allocations = 0
26
27 # Counts the number of allocations of FlatBuffer
28 var flatbuf_allocations = 0
29
30 # Counts the number of allocations of Concat
31 var concat_allocations = 0
32
33 # Counts the number of allocations of RopeBuffer
34 var ropebuf_allocations = 0
35
36 # Counts the number of calls to property length
37 var length_calls = new Counter[String]
38
39 # Counts the number of length calls that missed the cache
40 var length_cache_miss = new Counter[String]
41
42 # Counts the number of call to index on a Text
43 var index_call = new Counter[String]
44
45 # Count the number of times that an indexed access
46 # on a Concat caused a regeneration of the cache
47 var concat_cache_miss = 0
48
49 # Distance between characters when looking for a character in a FlatString
50 var index_len = new Counter[Int]
51
52 # Length (bytes) of the FlatString created by lib
53 var str_bytelen = new Counter[Int]
54
55 # Counter of the times `bytelen` is called on FlatString
56 var bytelen_call = new Counter[String]
57
58 # Counter of the times `bytepos` is called on each type of receiver
59 var position_call = new Counter[String]
60
61 # Counter of the times `bytepos` is called on each type of receiver
62 var bytepos_call = new Counter[String]
63
64 # Calls to the `first_byte` property of a FlatString
65 var first_byte_call = 0
66
67 # Calls to the `last_byte` property of a FlatString
68 var last_byte_call = 0
69
70 # Number of strings created with full length created
71 var str_full_created = 0
72
73 private fun show_string_stats do
74 print """
75 Usage of Strings:
76
77 Allocations, by type:
78 """
79 print "\t-FlatString = {flatstr_allocations}"
80 print "\t-FlatBuffer = {flatbuf_allocations}"
81 print "\t-Concat = {concat_allocations}"
82 print "\t-RopeBuffer = {ropebuf_allocations}"
83 print ""
84 print "Calls to length, by type:"
85 for k, v in length_calls do
86 printn "\t{k} = {v}"
87 if k == "FlatString" then printn " (cache misses {length_cache_miss[k]}, {div(length_cache_miss[k] * 100, v)}%)"
88 printn "\n"
89 end
90 print "Indexed accesses, by type:"
91 for k, v in index_call do
92 printn "\t{k} = {v}"
93 if k == "Concat" then printn " (cache misses {concat_cache_miss}, {div(concat_cache_miss * 100, v)}%)"
94 printn "\n"
95 end
96
97 print "Calls to bytelen for each type:"
98 for k, v in bytelen_call do
99 print "\t{k} = {v}"
100 end
101
102 print "Calls to position for each type:"
103 for k, v in position_call do
104 print "\t{k} = {v}"
105 end
106
107 print "Calls to bytepos for each type:"
108 for k, v in bytepos_call do
109 print "\t{k} = {v}"
110 end
111
112 print "Calls to first_byte on FlatString {first_byte_call}"
113 print "Calls to last_byte on FlatString {last_byte_call}"
114
115 print "FlatStrings allocated with length {str_full_created} ({str_full_created.to_f/flatstr_allocations.to_f * 100.0 }%)"
116
117 print "Length of travel for index distribution:"
118 index_len.print_content
119
120 print "Byte length of the FlatStrings created:"
121 str_bytelen.print_content
122 end
123
124 redef fun run do
125 super
126 show_string_stats
127 end
128 end
129
130 redef fun exit(i) do
131 show_string_stats
132 super
133 end
134
135 redef class Concat
136 init do
137 sys.concat_allocations += 1
138 end
139
140 redef fun bytelen do
141 sys.bytelen_call.inc "Concat"
142 return super
143 end
144
145 redef fun [](i) do
146 sys.index_call.inc "Concat"
147 if flat_last_pos_start != -1 then
148 var fsp = i - flat_last_pos_start
149 if fsp >= 0 and fsp < flat_cache.length then return flat_cache[fsp]
150 end
151 sys.concat_cache_miss += 1
152 var s: String = self
153 var st = i
154 loop
155 if s isa FlatString then break
156 s = s.as(Concat)
157 var lft = s.left
158 var llen = lft.length
159 if i >= llen then
160 s = s.right
161 i -= llen
162 else
163 s = s.left
164 end
165 end
166 flat_last_pos_start = st - i
167 flat_cache = s
168 return s[i]
169 end
170 end
171
172 redef class FlatText
173 redef fun char_to_byte_index(index) do
174 var ln = length
175 assert index >= 0
176 assert index < ln
177
178 # Find best insertion point
179 var delta_begin = index
180 var delta_end = (ln - 1) - index
181 var delta_cache = (position - index).abs
182 var min = delta_begin
183 var its = items
184
185 if delta_cache < min then min = delta_cache
186 if delta_end < min then min = delta_end
187
188 var ns_i: Int
189 var my_i: Int
190
191 if min == delta_begin then
192 ns_i = first_byte
193 my_i = 0
194 else if min == delta_cache then
195 ns_i = bytepos
196 my_i = position
197 else
198 ns_i = its.find_beginning_of_char_at(last_byte)
199 my_i = length - 1
200 end
201
202 var from = ns_i
203
204 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
205
206 var after = ns_i
207
208 sys.index_len.inc((after - from).abs)
209
210 position = index
211 bytepos = ns_i
212
213 return ns_i
214 end
215 end
216
217 redef class RopeBuffer
218 init do
219 sys.ropebuf_allocations += 1
220 end
221
222 redef fun bytelen do
223 sys.bytelen_call.inc "RopeBuffer"
224 return super
225 end
226
227 redef fun [](i) do
228 sys.index_call.inc "RopeBuffer"
229 return super
230 end
231 end
232
233 redef class FlatBuffer
234
235 init do
236 sys.flatbuf_allocations += 1
237 end
238
239 redef fun bytepos do
240 sys.bytepos_call.inc "FlatBuffer"
241 return super
242 end
243
244 redef fun bytepos=(p) do
245 sys.bytepos_call.inc "FlatBuffer"
246 super
247 end
248
249 redef fun position do
250 sys.position_call.inc "FlatBuffer"
251 return super
252 end
253
254 redef fun position=(p) do
255 sys.position_call.inc "FlatBuffer"
256 super
257 end
258
259 redef fun bytelen do
260 sys.bytelen_call.inc "FlatBuffer"
261 return super
262 end
263
264 redef fun length do
265 sys.length_calls.inc "FlatBuffer"
266 return super
267 end
268
269 redef fun [](i) do
270 sys.index_call.inc "FlatBuffer"
271 return super
272 end
273
274 redef fun char_to_byte_index(i) do
275 sys.index_call.inc "FlatBuffer"
276 return super
277 end
278 end
279
280 redef class FlatString
281
282 redef fun bytepos do
283 sys.bytepos_call.inc "FlatString"
284 return super
285 end
286
287 redef fun bytepos=(p) do
288 sys.bytepos_call.inc "FlatString"
289 super
290 end
291
292 redef fun position do
293 sys.position_call.inc "FlatString"
294 return super
295 end
296
297 redef fun position=(p) do
298 sys.position_call.inc "FlatString"
299 super
300 end
301
302 redef fun bytelen do
303 sys.bytelen_call.inc "FlatString"
304 return super
305 end
306
307 redef fun first_byte do
308 sys.first_byte_call += 1
309 return super
310 end
311
312 redef fun first_byte=(v) do
313 sys.first_byte_call += 1
314 super
315 end
316
317 redef fun last_byte do
318 sys.last_byte_call += 1
319 return super
320 end
321
322 redef fun last_byte=(v) do
323 sys.last_byte_call += 1
324 super
325 end
326
327 init do
328 sys.flatstr_allocations += 1
329 end
330
331 redef init with_infos(items, bytelen, from, to)
332 do
333 self.items = items
334 self.bytelen = bytelen
335 sys.str_bytelen.inc bytelen
336 first_byte = from
337 last_byte = to
338 end
339
340 redef init full(items, bytelen, from, to, length)
341 do
342 self.items = items
343 self.length = length
344 self.bytelen = bytelen
345 sys.str_bytelen.inc bytelen
346 sys.str_full_created += 1
347 first_byte = from
348 last_byte = to
349 end
350
351 private var length_cache: nullable Int = null
352
353 redef fun length do
354 sys.length_calls.inc "FlatString"
355 var l = length_cache
356 if l != null then return l
357 sys.length_cache_miss.inc "FlatString"
358 if bytelen == 0 then return 0
359 var st = first_byte
360 var its = items
361 var ln = 0
362 var lst = last_byte
363 while st <= lst do
364 st += its.length_of_char_at(st)
365 ln += 1
366 end
367 length_cache = ln
368 return ln
369 end
370
371 redef fun char_to_byte_index(i) do
372 sys.index_call.inc "FlatString"
373 return super
374 end
375 end