examples: annotate examples
[nit.git] / lib / text_stat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Injects stat-calculating functionalities to Text and its variants
12 #
13 # Every allocation is counted for each available type of Text in Core
14 #
15 # Cached operations are monitored and statistics of their use are printed
16 # at the end of the execution of a program
17 module text_stat
18
19 intrude import core::text::ropes
20 import counter
21
22 redef class Sys
23
24 # Counts the number of allocations of UnicodeFlatString
25 var uniflatstr_allocations = 0
26
27 # Counts the number of allocations of ASCIIFlatString
28 var asciiflatstr_allocations = 0
29
30 # Counts the number of allocations of FlatBuffer
31 var flatbuf_allocations = 0
32
33 # Counts the number of allocations of Concat
34 var concat_allocations = 0
35
36 # Counts the number of calls to property length
37 var length_calls = new Counter[String]
38
39 # Counts the number of length calls that missed the cache
40 var length_cache_miss = new Counter[String]
41
42 # Counts the number of call to index on a Text
43 var index_call = new Counter[String]
44
45 # Count the number of times that an indexed access
46 # on a Concat caused a regeneration of the cache
47 var concat_cache_miss = 0
48
49 # Distance between characters when looking for a character in a FlatString
50 var index_len = new Counter[Int]
51
52 # Length (bytes) of the FlatString created by lib
53 var str_byte_length = new Counter[Int]
54
55 # Counter of the times `byte_length` is called on FlatString
56 var byte_length_call = new Counter[String]
57
58 # Counter of the times `bytepos` is called on each type of receiver
59 var position_call = new Counter[String]
60
61 # Counter of the times `bytepos` is called on each type of receiver
62 var bytepos_call = new Counter[String]
63
64 # Calls to the `first_byte` property of a FlatString
65 var first_byte_call = 0
66
67 # Calls to the `last_byte` property of a FlatString
68 var last_byte_call = 0
69
70 # Number of strings created with full length created
71 var str_full_created = 0
72
73 private fun show_string_stats do
74 print """
75 Usage of Strings:
76
77 Allocations, by type:
78 """
79 print "\t-UnicodeFlatString = {uniflatstr_allocations}"
80 print "\t-ASCIIFlatString = {asciiflatstr_allocations}"
81 print "\t-FlatBuffer = {flatbuf_allocations}"
82 print "\t-Concat = {concat_allocations}"
83 print ""
84 print "Calls to length, by type:"
85 for k, v in length_calls do
86 printn "\t{k} = {v}"
87 if k == "UnicodeFlatString" then printn " (cache misses {length_cache_miss[k]}, {div(length_cache_miss[k] * 100, v)}%)"
88 printn "\n"
89 end
90 print "Indexed accesses, by type:"
91 for k, v in index_call do
92 printn "\t{k} = {v}"
93 if k == "Concat" then printn " (cache misses {concat_cache_miss}, {div(concat_cache_miss * 100, v)}%)"
94 printn "\n"
95 end
96
97 print "Calls to byte_length for each type:"
98 for k, v in byte_length_call do
99 print "\t{k} = {v}"
100 end
101
102 print "Calls to position for each type:"
103 for k, v in position_call do
104 print "\t{k} = {v}"
105 end
106
107 print "Calls to bytepos for each type:"
108 for k, v in bytepos_call do
109 print "\t{k} = {v}"
110 end
111
112 print "Calls to first_byte on FlatString {first_byte_call}"
113 print "Calls to last_byte on FlatString {last_byte_call}"
114
115 print "Length of travel for index distribution:"
116 index_len.print_content
117
118 print "Byte length of the FlatStrings created:"
119 str_byte_length.print_content
120 end
121
122 redef fun run do
123 super
124 show_string_stats
125 end
126 end
127
128 redef fun exit(i) do
129 show_string_stats
130 super
131 end
132
133 redef class Concat
134 init do
135 sys.concat_allocations += 1
136 end
137
138 redef fun byte_length do
139 sys.byte_length_call.inc "Concat"
140 return super
141 end
142
143 redef fun [](i) do
144 sys.index_call.inc "Concat"
145 if flat_last_pos_start != -1 then
146 var fsp = i - flat_last_pos_start
147 if fsp >= 0 and fsp < flat_cache.length then return flat_cache[fsp]
148 end
149 sys.concat_cache_miss += 1
150 var s: String = self
151 var st = i
152 loop
153 if s isa FlatString then break
154 s = s.as(Concat)
155 var lft = s.left
156 var llen = lft.length
157 if i >= llen then
158 s = s.right
159 i -= llen
160 else
161 s = s.left
162 end
163 end
164 flat_last_pos_start = st - i
165 flat_cache = s
166 return s[i]
167 end
168 end
169
170 redef class FlatText
171 redef fun char_to_byte_index(index) do
172 var ln = length
173 assert index >= 0
174 assert index < ln
175
176 # Find best insertion point
177 var delta_begin = index
178 var delta_end = (ln - 1) - index
179 var delta_cache = (position - index).abs
180 var min = delta_begin
181 var its = items
182
183 if delta_cache < min then min = delta_cache
184 if delta_end < min then min = delta_end
185
186 var ns_i: Int
187 var my_i: Int
188
189 if min == delta_begin then
190 ns_i = first_byte
191 my_i = 0
192 else if min == delta_cache then
193 ns_i = bytepos
194 my_i = position
195 else
196 ns_i = its.find_beginning_of_char_at(last_byte)
197 my_i = length - 1
198 end
199
200 var from = ns_i
201
202 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
203
204 var after = ns_i
205
206 sys.index_len.inc((after - from).abs)
207
208 position = index
209 bytepos = ns_i
210
211 return ns_i
212 end
213 end
214
215 redef class FlatBuffer
216
217 init do
218 sys.flatbuf_allocations += 1
219 end
220
221 redef fun bytepos do
222 sys.bytepos_call.inc "FlatBuffer"
223 return super
224 end
225
226 redef fun bytepos=(p) do
227 sys.bytepos_call.inc "FlatBuffer"
228 super
229 end
230
231 redef fun position do
232 sys.position_call.inc "FlatBuffer"
233 return super
234 end
235
236 redef fun position=(p) do
237 sys.position_call.inc "FlatBuffer"
238 super
239 end
240
241 redef fun byte_length do
242 sys.byte_length_call.inc "FlatBuffer"
243 return super
244 end
245
246 redef fun length do
247 sys.length_calls.inc "FlatBuffer"
248 return super
249 end
250
251 redef fun [](i) do
252 sys.index_call.inc "FlatBuffer"
253 return super
254 end
255
256 redef fun char_to_byte_index(i) do
257 sys.index_call.inc "FlatBuffer"
258 return super
259 end
260 end
261
262 redef class FlatString
263
264 redef fun bytepos do
265 sys.bytepos_call.inc "FlatString"
266 return super
267 end
268
269 redef fun bytepos=(p) do
270 sys.bytepos_call.inc "FlatString"
271 super
272 end
273
274 redef fun position do
275 sys.position_call.inc "FlatString"
276 return super
277 end
278
279 redef fun position=(p) do
280 sys.position_call.inc "FlatString"
281 super
282 end
283
284 redef fun byte_length do
285 sys.byte_length_call.inc "FlatString"
286 return super
287 end
288
289 redef fun first_byte do
290 sys.first_byte_call += 1
291 return super
292 end
293
294 redef fun first_byte=(v) do
295 sys.first_byte_call += 1
296 super
297 end
298
299 redef fun last_byte do
300 sys.last_byte_call += 1
301 return super
302 end
303
304 private var length_cache: nullable Int = null
305
306 redef fun length do
307 sys.length_calls.inc "FlatString"
308 var l = length_cache
309 if l != null then return l
310 sys.length_cache_miss.inc "FlatString"
311 if byte_length == 0 then return 0
312 var st = first_byte
313 var its = items
314 var ln = 0
315 var lst = last_byte
316 while st <= lst do
317 st += its.length_of_char_at(st)
318 ln += 1
319 end
320 length_cache = ln
321 return ln
322 end
323
324 redef fun char_to_byte_index(i) do
325 sys.index_call.inc "FlatString"
326 return super
327 end
328 end
329
330 redef class ASCIIFlatString
331 redef init full_data(items, byte_length, from, length)
332 do
333 super
334 sys.asciiflatstr_allocations += 1
335 sys.str_full_created += 1
336 end
337 end
338
339 redef class UnicodeFlatString
340 redef init full_data(items, byte_length, from, length)
341 do
342 super
343 sys.uniflatstr_allocations += 1
344 sys.str_full_created += 1
345 end
346 end