benchmarks/string: Added variants of lib to benchmarks
[nit.git] / benchmarks / strings / lib_variants / regular / linear_substring.patch
1 From 15401bfd27836dd712e0af2ef8fc1aa33af29f89 Mon Sep 17 00:00:00 2001
2 From: Lucas Bajolet <r4pass@hotmail.com>
3 Date: Fri, 21 Aug 2015 14:39:34 -0400
4 Subject: [PATCH] Patch: Removed constant-time substring in text
5
6 Signed-off-by: Lucas Bajolet <r4pass@hotmail.com>
7 ---
8  lib/standard/bytes.nit      |   7 ++-
9  lib/standard/file.nit       |   2 +-
10  lib/standard/stream.nit     |   8 +++-
11  lib/standard/text/flat.nit  | 102 +++++++++++++++++++-------------------------
12  lib/standard/text/ropes.nit |  39 +++++++++++------
13  lib/websocket/websocket.nit |   4 +-
14  6 files changed, 84 insertions(+), 78 deletions(-)
15
16 diff --git a/lib/standard/bytes.nit b/lib/standard/bytes.nit
17 index 59c4c5f..f22b325 100644
18 --- a/lib/standard/bytes.nit
19 +++ b/lib/standard/bytes.nit
20 @@ -150,7 +150,7 @@ class Bytes
21                         b = clean_utf8
22                         persisted = false
23                 end
24 -               return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
25 +               return new FlatString.with_infos(b.items, b.length)
26         end
27  
28         redef fun iterator do return new BytesIterator.with_buffer(self)
29 @@ -256,7 +256,7 @@ redef class Text
30         # Appends `self.bytes` to `b`
31         fun append_to_bytes(b: Bytes) do
32                 for s in substrings do
33 -                       var from = if s isa FlatString then s.first_byte else 0
34 +                       var from = 0
35                         b.append_ns_from(s.items, s.bytelen, from)
36                 end
37         end
38 @@ -264,8 +264,7 @@ end
39  
40  redef class FlatText
41         redef fun append_to_bytes(b) do
42 -               var from = if self isa FlatString then first_byte else 0
43 -               b.append_ns_from(items, bytelen, from)
44 +               b.append_ns_from(items, bytelen, 0)
45         end
46  end
47  
48 diff --git a/lib/standard/file.nit b/lib/standard/file.nit
49 index aba34e8..f7df0d4 100644
50 --- a/lib/standard/file.nit
51 +++ b/lib/standard/file.nit
52 @@ -1263,7 +1263,7 @@ end
53  redef class FlatString
54         redef fun write_native_to(s)
55         do
56 -               s.write_native(items, first_byte, bytelen)
57 +               s.write_native(items, 0, bytelen)
58         end
59  end
60  
61 diff --git a/lib/standard/stream.nit b/lib/standard/stream.nit
62 index 2db319a..6e1b5d1 100644
63 --- a/lib/standard/stream.nit
64 +++ b/lib/standard/stream.nit
65 @@ -186,12 +186,16 @@ abstract class Reader
66                         # if this is the best size or not
67                         var chunksz = 129
68                         if chunksz > remsp then
69 -                               rets += new FlatString.with_infos(sits, remsp, pos, pos + remsp - 1)
70 +                               var nns = new NativeString(remsp)
71 +                               sits.copy_to(nns, remsp, pos, 0)
72 +                               rets += new FlatString.with_infos(nns, remsp)
73                                 break
74                         end
75                         var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
76                         var bytelen = st - pos
77 -                       rets += new FlatString.with_infos(sits, bytelen, pos, st - 1)
78 +                       var nns = new NativeString(bytelen)
79 +                       sits.copy_to(nns, bytelen, pos, 0)
80 +                       rets += new FlatString.with_infos(nns, bytelen)
81                         pos = st
82                         remsp -= bytelen
83                 end
84 diff --git a/lib/standard/text/flat.nit b/lib/standard/text/flat.nit
85 index 917b0e5..c68f0c1 100644
86 --- a/lib/standard/text/flat.nit
87 +++ b/lib/standard/text/flat.nit
88 @@ -36,15 +36,13 @@ end
89  
90  redef class FlatText
91  
92 -       private fun first_byte: Int do return 0
93 -
94 -       private fun last_byte: Int do return bytelen - 1
95 +       redef fun [](index) do return items.char_at(char_to_byte_index(index))
96  
97 -       # Cache of the latest position (char) explored in the string
98 -       var position: Int = 0
99 +       # Position in NativeString
100 +       var bytepos = 0
101  
102 -       # Cached position (bytes) in the NativeString underlying the String
103 -       var bytepos: Int = first_byte is lateinit
104 +       # Position in chars
105 +       var position = 0
106  
107         # Index of the character `index` in `items`
108         private fun char_to_byte_index(index: Int): Int do
109 @@ -66,13 +64,13 @@ redef class FlatText
110                 var my_i: Int
111  
112                 if min == delta_begin then
113 -                       ns_i = first_byte
114 +                       ns_i = 0
115                         my_i = 0
116                 else if min == delta_cache then
117                         ns_i = bytepos
118                         my_i = position
119                 else
120 -                       ns_i = its.find_beginning_of_char_at(last_byte)
121 +                       ns_i = its.find_beginning_of_char_at(bytelen - 1)
122                         my_i = length - 1
123                 end
124  
125 @@ -83,8 +81,6 @@ redef class FlatText
126  
127                 return ns_i
128         end
129 -
130 -       redef fun [](index) do return items.char_at(char_to_byte_index(index))
131  end
132  
133  # Immutable strings of characters.
134 @@ -92,22 +88,16 @@ class FlatString
135         super FlatText
136         super String
137  
138 -       # Index at which `self` begins in `items`, inclusively
139 -       redef var first_byte is noinit
140 -
141 -       # Index at which `self` ends in `items`, inclusively
142 -       redef var last_byte is noinit
143 -
144         redef var chars = new FlatStringCharView(self) is lazy
145  
146         redef var bytes = new FlatStringByteView(self) is lazy
147  
148         redef var length is lazy do
149                 if bytelen == 0 then return 0
150 -               var st = first_byte
151 +               var st = 0
152                 var its = items
153                 var ln = 0
154 -               var lst = last_byte
155 +               var lst = bytelen - 1
156                 while st <= lst do
157                         st += its.length_of_char_at(st)
158                         ln += 1
159 @@ -126,7 +116,7 @@ class FlatString
160                 return s
161         end
162  
163 -       redef fun fast_cstring do return items.fast_cstring(first_byte)
164 +       redef fun fast_cstring do return items.fast_cstring(0)
165  
166         redef fun substring(from, count)
167         do
168 @@ -146,8 +136,10 @@ class FlatString
169                 var byteto = char_to_byte_index(end_index)
170                 byteto += items.length_of_char_at(byteto) - 1
171  
172 -               var s = new FlatString.full(items, byteto - bytefrom + 1, bytefrom, byteto, count)
173 -               return s
174 +               var nslen = byteto - bytefrom + 1
175 +               var nns = new NativeString(nslen)
176 +               items.copy_to(nns, nslen, bytefrom, 0)
177 +               return new FlatString.full(nns, nslen, count)
178         end
179  
180         redef fun empty do return "".as(FlatString)
181 @@ -195,31 +187,27 @@ class FlatString
182         #
183         # `items` will be used as is, without copy, to retrieve the characters of the string.
184         # Aliasing issues is the responsibility of the caller.
185 -       private init with_infos(items: NativeString, bytelen, from, to: Int)
186 +       private init with_infos(items: NativeString, bytelen: Int)
187         do
188                 self.items = items
189                 self.bytelen = bytelen
190 -               first_byte = from
191 -               last_byte = to
192         end
193  
194         # Low-level creation of a new string with all the data.
195         #
196         # `items` will be used as is, without copy, to retrieve the characters of the string.
197         # Aliasing issues is the responsibility of the caller.
198 -       private init full(items: NativeString, bytelen, from, to, length: Int)
199 +       private init full(items: NativeString, bytelen, length: Int)
200         do
201                 self.items = items
202                 self.length = length
203                 self.bytelen = bytelen
204 -               first_byte = from
205 -               last_byte = to
206         end
207  
208         redef fun to_cstring do
209                 if real_items != null then return real_items.as(not null)
210                 var new_items = new NativeString(bytelen + 1)
211 -               self.items.copy_to(new_items, bytelen, first_byte, 0)
212 +               self.items.copy_to(new_items, bytelen, 0, 0)
213                 new_items[bytelen] = 0u8
214                 real_items = new_items
215                 return new_items
216 @@ -235,8 +223,8 @@ class FlatString
217  
218                 if other.bytelen != my_length then return false
219  
220 -               var my_index = first_byte
221 -               var its_index = other.first_byte
222 +               var my_index = 0
223 +               var its_index = 0
224  
225                 var last_iteration = my_index + my_length
226  
227 @@ -285,14 +273,14 @@ class FlatString
228                 var mlen = bytelen
229                 var nlen = mlen + slen
230                 var mits = items
231 -               var mifrom = first_byte
232 +               var mifrom = 0
233                 if s isa FlatText then
234                         var sits = s.items
235 -                       var sifrom = s.first_byte
236 +                       var sifrom = 0
237                         var ns = new NativeString(nlen + 1)
238                         mits.copy_to(ns, mlen, mifrom, 0)
239                         sits.copy_to(ns, slen, sifrom, mlen)
240 -                       return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
241 +                       return new FlatString.full(ns, nlen, length + o.length)
242                 else
243                         abort
244                 end
245 @@ -307,11 +295,11 @@ class FlatString
246                 ns[new_bytelen] = 0u8
247                 var offset = 0
248                 while i > 0 do
249 -                       items.copy_to(ns, bytelen, first_byte, offset)
250 +                       items.copy_to(ns, bytelen, 0, offset)
251                         offset += mybtlen
252                         i -= 1
253                 end
254 -               return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
255 +               return new FlatString.full(ns, new_bytelen, newlen)
256         end
257  
258  
259 @@ -320,11 +308,11 @@ class FlatString
260                 if hash_cache == null then
261                         # djb2 hash algorithm
262                         var h = 5381
263 -                       var i = first_byte
264 +                       var i = 0
265  
266                         var myitems = items
267  
268 -                       while i <= last_byte do
269 +                       while i <= bytelen - 1 do
270                                 h = (h << 5) + h + myitems[i].to_i
271                                 i += 1
272                         end
273 @@ -408,16 +396,16 @@ private class FlatStringByteReverseIterator
274  
275         init with_pos(tgt: FlatString, pos: Int)
276         do
277 -               init(tgt, tgt.items, pos + tgt.first_byte)
278 +               init(tgt, tgt.items, pos)
279         end
280  
281 -       redef fun is_ok do return curr_pos >= target.first_byte
282 +       redef fun is_ok do return curr_pos >= 0
283  
284         redef fun item do return target_items[curr_pos]
285  
286         redef fun next do curr_pos -= 1
287  
288 -       redef fun index do return curr_pos - target.first_byte
289 +       redef fun index do return curr_pos
290  
291  end
292  
293 @@ -432,16 +420,16 @@ private class FlatStringByteIterator
294  
295         init with_pos(tgt: FlatString, pos: Int)
296         do
297 -               init(tgt, tgt.items, pos + tgt.first_byte)
298 +               init(tgt, tgt.items, pos + 0)
299         end
300  
301 -       redef fun is_ok do return curr_pos <= target.last_byte
302 +       redef fun is_ok do return curr_pos <= target.bytelen - 1
303  
304         redef fun item do return target_items[curr_pos]
305  
306         redef fun next do curr_pos += 1
307  
308 -       redef fun index do return curr_pos - target.first_byte
309 +       redef fun index do return curr_pos
310  
311  end
312  
313 @@ -452,12 +440,12 @@ private class FlatStringByteView
314  
315         redef fun [](index)
316         do
317 -               # Check that the index (+ first_byte) is not larger than last_byte
318 +               # Check that the index (+ 0) is not larger than bytelen - 1
319                 # In other terms, if the index is valid
320                 assert index >= 0
321                 var target = self.target
322 -               assert (index + target.first_byte) <= target.last_byte
323 -               return target.items[index + target.first_byte]
324 +               assert index <= target.bytelen - 1
325 +               return target.items[index]
326         end
327  
328         redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
329 @@ -589,7 +577,7 @@ class FlatBuffer
330         do
331                 written = true
332                 if bytelen == 0 then items = new NativeString(1)
333 -               return new FlatString.with_infos(items, bytelen, 0, bytelen - 1)
334 +               return new FlatString.with_infos(items, bytelen)
335         end
336  
337         redef fun to_cstring
338 @@ -653,7 +641,7 @@ class FlatBuffer
339                 var sl = s.bytelen
340                 enlarge(bytelen + sl)
341                 if s isa FlatText then
342 -                       s.items.copy_to(items, sl, s.first_byte, bytelen)
343 +                       s.items.copy_to(items, sl, 0, bytelen)
344                 else
345                         for i in s.substrings do append i
346                         return
347 @@ -700,7 +688,7 @@ class FlatBuffer
348  
349         redef fun times(repeats)
350         do
351 -               var x = new FlatString.with_infos(items, bytelen, 0, bytelen - 1)
352 +               var x = new FlatString.with_infos(items, bytelen)
353                 for i in [1 .. repeats[ do
354                         append(x)
355                 end
356 @@ -881,7 +869,7 @@ redef class NativeString
357         redef fun to_s_with_length(length): FlatString
358         do
359                 assert length >= 0
360 -               var str = new FlatString.with_infos(self, length, 0, length - 1)
361 +               var str = new FlatString.with_infos(self, length)
362                 return str
363         end
364  
365 @@ -891,7 +879,7 @@ redef class NativeString
366                 var length = cstring_length
367                 var new_self = new NativeString(length + 1)
368                 copy_to(new_self, length, 0, 0)
369 -               var str = new FlatString.with_infos(new_self, length, 0, length - 1)
370 +               var str = new FlatString.with_infos(new_self, length)
371                 new_self[length] = 0u8
372                 str.real_items = new_self
373                 return str
374 @@ -952,7 +940,7 @@ redef class Int
375                 var ns = new NativeString(nslen + 1)
376                 ns[nslen] = 0u8
377                 native_int_to_s(ns, nslen + 1)
378 -               return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
379 +               return new FlatString.full(ns, nslen, nslen)
380         end
381  end
382  
383 @@ -989,13 +977,13 @@ redef class Array[E]
384                         var tmp = na[i]
385                         if tmp isa FlatString then
386                                 var tpl = tmp.bytelen
387 -                               tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
388 +                               tmp.items.copy_to(ns, tpl, 0, off)
389                                 off += tpl
390                         else
391                                 for j in tmp.substrings do
392                                         var s = j.as(FlatString)
393                                         var slen = s.bytelen
394 -                                       s.items.copy_to(ns, slen, s.first_byte, off)
395 +                                       s.items.copy_to(ns, slen, 0, off)
396                                         off += slen
397                                 end
398                         end
399 @@ -1026,13 +1014,13 @@ redef class NativeArray[E]
400                         var tmp = na[i]
401                         if tmp isa FlatString then
402                                 var tpl = tmp.bytelen
403 -                               tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
404 +                               tmp.items.copy_to(ns, tpl, 0, off)
405                                 off += tpl
406                         else
407                                 for j in tmp.substrings do
408                                         var s = j.as(FlatString)
409                                         var slen = s.bytelen
410 -                                       s.items.copy_to(ns, slen, s.first_byte, off)
411 +                                       s.items.copy_to(ns, slen, 0, off)
412                                         off += slen
413                                 end
414                         end
415 diff --git a/lib/standard/text/ropes.nit b/lib/standard/text/ropes.nit
416 index 2b3ff28..5adf321 100644
417 --- a/lib/standard/text/ropes.nit
418 +++ b/lib/standard/text/ropes.nit
419 @@ -95,7 +95,7 @@ private class Concat
420                 var off = 0
421                 for i in substrings do
422                         var ilen = i.bytelen
423 -                       i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).first_byte, off)
424 +                       i.as(FlatString).items.copy_to(ns, ilen, 0, off)
425                         off += ilen
426                 end
427                 return ns
428 @@ -151,7 +151,7 @@ private class Concat
429         redef fun substring(from, len) do
430                 var llen = left.length
431                 if from < llen then
432 -                       if from + len < llen then return left.substring(from,len)
433 +                       if from + len < llen then return left.substring(from, len)
434                         var lsublen = llen - from
435                         return left.substring_from(from) + right.substring(0, len - lsublen)
436                 else
437 @@ -439,7 +439,7 @@ class RopeBuffer
438                 end
439                 if s isa FlatText then
440                         var oits = s.items
441 -                       var from = s.first_byte
442 +                       var from = 0
443                         var remsp = buf_size - rpos
444                         if slen <= remsp then
445                                 oits.copy_to(ns, slen, from, rpos)
446 @@ -474,7 +474,10 @@ class RopeBuffer
447         # the final String and re-allocates a new larger Buffer.
448         private fun dump_buffer do
449                 written = false
450 -               var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
451 +               var nslen = rpos - dumped
452 +               var nns = new NativeString(nslen)
453 +               ns.copy_to(nns, nslen, dumped, 0)
454 +               var nstr = new FlatString.with_infos(nns, nslen)
455                 str += nstr
456                 var bs = buf_size
457                 bs = bs * 2
458 @@ -487,14 +490,21 @@ class RopeBuffer
459         # Similar to dump_buffer, but does not reallocate a new NativeString
460         private fun persist_buffer do
461                 if rpos == dumped then return
462 -               var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
463 +               var nslen = rpos - dumped
464 +               var nns = new NativeString(nslen)
465 +               ns.copy_to(nns, nslen, dumped, 0)
466 +               var nstr = new FlatString.with_infos(nns, nslen)
467                 str += nstr
468                 dumped = rpos
469         end
470  
471         redef fun output do
472                 str.output
473 -               new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output
474 +               var nslen = rpos - dumped
475 +               var nns = new NativeString(nslen)
476 +               ns.copy_to(nns, nslen, dumped, 0)
477 +               var nstr = new FlatString.with_infos(nns, nslen)
478 +               nstr.output
479         end
480  
481         # Enlarge is useless here since the `Buffer`
482 @@ -516,7 +526,11 @@ class RopeBuffer
483         redef fun reverse do
484                 # Flush the buffer in order to only have to reverse `str`.
485                 if rpos > 0 and dumped != rpos then
486 -                       str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
487 +                       var nslen = rpos - dumped
488 +                       var nns = new NativeString(nslen)
489 +                       ns.copy_to(nns, nslen, dumped, 0)
490 +                       var nstr = new FlatString.with_infos(nns, nslen)
491 +                       str += nstr
492                         dumped = rpos
493                 end
494                 str = str.reversed
495 @@ -553,8 +567,8 @@ redef class FlatString
496                 if s isa FlatString then
497                         if nlen > maxlen then return new Concat(self, s)
498                         var mits = items
499 -                       var sifrom = s.first_byte
500 -                       var mifrom = first_byte
501 +                       var sifrom = 0
502 +                       var mifrom = 0
503                         var sits = s.items
504                         var ns = new NativeString(nlen + 1)
505                         mits.copy_to(ns, mlen, mifrom, 0)
506 @@ -602,7 +616,7 @@ private class RopeByteReverseIterator
507                 subs = new ReverseRopeSubstrings(root)
508                 var s = subs.item
509                 ns = s.items
510 -               pns = s.last_byte
511 +               pns = s.bytelen - 1
512         end
513  
514         init from(root: Concat, pos: Int) do
515 @@ -628,7 +642,7 @@ private class RopeByteReverseIterator
516                 if not subs.is_ok then return
517                 var s = subs.item
518                 ns = s.items
519 -               pns = s.last_byte
520 +               pns = s.bytelen - 1
521         end
522  end
523  
524 @@ -876,7 +890,8 @@ private class RopeBufSubstringIterator
525  
526         init(str: RopeBuffer) is old_style_init do
527                 iter = str.str.substrings
528 -               nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1)
529 +               str.persist_buffer
530 +               nsstr = "".as(FlatString)
531                 if str.length == 0 then nsstr_done = true
532         end
533  
534 diff --git a/lib/websocket/websocket.nit b/lib/websocket/websocket.nit
535 index 441df14..52b51a0 100644
536 --- a/lib/websocket/websocket.nit
537 +++ b/lib/websocket/websocket.nit
538 @@ -137,10 +137,10 @@ class WebsocketConnection
539                         ans_buffer.add(msg.length.to_b)
540                 end
541                 if msg isa FlatString then
542 -                       ans_buffer.append_ns_from(msg.items, msg.length, msg.first_byte)
543 +                       ans_buffer.append_ns_from(msg.items, msg.length, 0)
544                 else
545                         for i in msg.substrings do
546 -                               ans_buffer.append_ns_from(i.as(FlatString).items, i.length, i.as(FlatString).first_byte)
547 +                               ans_buffer.append_ns_from(i.as(FlatString).items, i.length, 0)
548                         end
549                 end
550                 return ans_buffer
551 -- 
552 1.9.1
553