lib/core: Fixed `FlatBuffer::append_substring`
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 # First byte of the NativeString
40 protected fun first_byte: Int do return 0
41
42 # Last byte of the NativeString
43 protected fun last_byte: Int do return first_byte + _byte_length - 1
44
45 # Cache of the latest position (char) explored in the string
46 var position: Int = 0
47
48 # Cached position (bytes) in the NativeString underlying the String
49 var bytepos: Int = 0
50
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index(index: Int): Int do
53 var dpos = index - _position
54 var b = _bytepos
55 var its = _items
56
57 if dpos == 1 then
58 if its[b] & 0x80u8 == 0x00u8 then
59 b += 1
60 else
61 b += its.length_of_char_at(b)
62 end
63 _bytepos = b
64 _position = index
65 return b
66 end
67 if dpos == -1 then
68 b = its.find_beginning_of_char_at(b - 1)
69 _bytepos = b
70 _position = index
71 return b
72 end
73 if dpos == 0 then return b
74
75 var ln = _length
76 var pos = _position
77 # Find best insertion point
78 var delta_begin = index
79 var delta_end = (ln - 1) - index
80 var delta_cache = (pos - index).abs
81 var min = delta_begin
82
83 if delta_cache < min then min = delta_cache
84 if delta_end < min then min = delta_end
85
86 var ns_i: Int
87 var my_i: Int
88
89 if min == delta_cache then
90 ns_i = _bytepos
91 my_i = pos
92 else if min == delta_begin then
93 ns_i = first_byte
94 my_i = 0
95 else
96 ns_i = its.find_beginning_of_char_at(last_byte)
97 my_i = _length - 1
98 end
99
100 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
101
102 _position = index
103 _bytepos = ns_i
104
105 return ns_i
106 end
107
108 # By escaping `self` to HTML, how many more bytes will be needed ?
109 fun chars_to_html_escape: Int do
110 var its = _items
111 var max = last_byte
112 var pos = first_byte
113 var endlen = 0
114 while pos <= max do
115 var c = its[pos]
116 if c == 0x3Cu8 then
117 endlen += 3
118 else if c == 0x3Eu8 then
119 endlen += 3
120 else if c == 0x26u8 then
121 endlen += 4
122 else if c == 0x22u8 then
123 endlen += 4
124 else if c == 0x27u8 then
125 endlen += 4
126 else if c == 0x2Fu8 then
127 endlen += 4
128 end
129 pos += 1
130 end
131 return endlen
132 end
133
134 redef fun html_escape
135 do
136 var extra = chars_to_html_escape
137 if extra == 0 then return to_s
138 var its = _items
139 var max = last_byte
140 var pos = first_byte
141 var nlen = extra + _byte_length
142 var nits = new NativeString(nlen)
143 var outpos = 0
144 while pos <= max do
145 var c = its[pos]
146 # Special codes:
147 # Some HTML characters are used as meta-data, they need
148 # to be replaced by an HTML-Escaped equivalent
149 #
150 # * 0x3C (<) => &lt;
151 # * 0x3E (>) => &gt;
152 # * 0x26 (&) => &amp;
153 # * 0x22 (") => &#34;
154 # * 0x27 (') => &#39;
155 # * 0x2F (/) => &#47;
156 if c == 0x3Cu8 then
157 nits[outpos] = 0x26u8
158 nits[outpos + 1] = 0x6Cu8
159 nits[outpos + 2] = 0x74u8
160 nits[outpos + 3] = 0x3Bu8
161 outpos += 4
162 else if c == 0x3Eu8 then
163 nits[outpos] = 0x26u8
164 nits[outpos + 1] = 0x67u8
165 nits[outpos + 2] = 0x74u8
166 nits[outpos + 3] = 0x3Bu8
167 outpos += 4
168 else if c == 0x26u8 then
169 nits[outpos] = 0x26u8
170 nits[outpos + 1] = 0x61u8
171 nits[outpos + 2] = 0x6Du8
172 nits[outpos + 3] = 0x70u8
173 nits[outpos + 4] = 0x3Bu8
174 outpos += 5
175 else if c == 0x22u8 then
176 nits[outpos] = 0x26u8
177 nits[outpos + 1] = 0x23u8
178 nits[outpos + 2] = 0x33u8
179 nits[outpos + 3] = 0x34u8
180 nits[outpos + 4] = 0x3Bu8
181 outpos += 5
182 else if c == 0x27u8 then
183 nits[outpos] = 0x26u8
184 nits[outpos + 1] = 0x23u8
185 nits[outpos + 2] = 0x33u8
186 nits[outpos + 3] = 0x39u8
187 nits[outpos + 4] = 0x3Bu8
188 outpos += 5
189 else if c == 0x2Fu8 then
190 nits[outpos] = 0x26u8
191 nits[outpos + 1] = 0x23u8
192 nits[outpos + 2] = 0x34u8
193 nits[outpos + 3] = 0x37u8
194 nits[outpos + 4] = 0x3Bu8
195 outpos += 5
196 else
197 nits[outpos] = c
198 outpos += 1
199 end
200 pos += 1
201 end
202 var s = new FlatString.with_infos(nits, nlen, 0)
203 return s
204 end
205
206 # By escaping `self` to C, how many more bytes will be needed ?
207 #
208 # This enables a double-optimization in `escape_to_c` since if this
209 # method returns 0, then `self` does not need escaping and can be
210 # returned as-is
211 fun chars_to_escape_to_c: Int do
212 var its = _items
213 var max = last_byte
214 var pos = first_byte
215 var req_esc = 0
216 while pos <= max do
217 var c = its[pos]
218 if c == 0x0Au8 then
219 req_esc += 1
220 else if c == 0x09u8 then
221 req_esc += 1
222 else if c == 0x22u8 then
223 req_esc += 1
224 else if c == 0x27u8 then
225 req_esc += 1
226 else if c == 0x5Cu8 then
227 req_esc += 1
228 else if c == 0x3Fu8 then
229 var j = pos + 1
230 if j < length then
231 var next = its[j]
232 # We ignore `??'` because it will be escaped as `??\'`.
233 if
234 next == 0x21u8 or
235 next == 0x28u8 or
236 next == 0x29u8 or
237 next == 0x2Du8 or
238 next == 0x2Fu8 or
239 next == 0x3Cu8 or
240 next == 0x3Du8 or
241 next == 0x3Eu8
242 then req_esc += 1
243 end
244 else if c < 32u8 then
245 req_esc += 3
246 end
247 pos += 1
248 end
249 return req_esc
250 end
251
252 redef fun escape_to_c do
253 var ln_extra = chars_to_escape_to_c
254 if ln_extra == 0 then return self.to_s
255 var its = _items
256 var max = last_byte
257 var nlen = _byte_length + ln_extra
258 var nns = new NativeString(nlen)
259 var pos = first_byte
260 var opos = 0
261 while pos <= max do
262 var c = its[pos]
263 # Special codes:
264 #
265 # Any byte with value < 32 is a control character
266 # All their uses will be replaced by their octal
267 # value in C.
268 #
269 # There are two exceptions however:
270 #
271 # * 0x09 => \t
272 # * 0x0A => \n
273 #
274 # Aside from the code points above, the following are:
275 #
276 # * 0x22 => \"
277 # * 0x27 => \'
278 # * 0x5C => \\
279 if c == 0x09u8 then
280 nns[opos] = 0x5Cu8
281 nns[opos + 1] = 0x74u8
282 opos += 2
283 else if c == 0x0Au8 then
284 nns[opos] = 0x5Cu8
285 nns[opos + 1] = 0x6Eu8
286 opos += 2
287 else if c == 0x22u8 then
288 nns[opos] = 0x5Cu8
289 nns[opos + 1] = 0x22u8
290 opos += 2
291 else if c == 0x27u8 then
292 nns[opos] = 0x5Cu8
293 nns[opos + 1] = 0x27u8
294 opos += 2
295 else if c == 0x5Cu8 then
296 nns[opos] = 0x5Cu8
297 nns[opos + 1] = 0x5Cu8
298 opos += 2
299 else if c == 0x3Fu8 then
300 var j = pos + 1
301 if j < length then
302 var next = its[j]
303 # We ignore `??'` because it will be escaped as `??\'`.
304 if
305 next == 0x21u8 or
306 next == 0x28u8 or
307 next == 0x29u8 or
308 next == 0x2Du8 or
309 next == 0x2Fu8 or
310 next == 0x3Cu8 or
311 next == 0x3Du8 or
312 next == 0x3Eu8
313 then
314 nns[opos] = 0x5Cu8
315 opos += 1
316 end
317 end
318 nns[opos] = 0x3Fu8
319 opos += 1
320 else if c < 32u8 then
321 nns[opos] = 0x5Cu8
322 nns[opos + 1] = 0x30u8
323 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
324 nns[opos + 3] = (c & 0x07u8) + 0x30u8
325 opos += 4
326 else
327 nns[opos] = c
328 opos += 1
329 end
330 pos += 1
331 end
332 return nns.to_s_unsafe(nlen)
333 end
334
335 redef fun [](index) do
336 var len = _length
337
338 # Statistically:
339 # * ~70% want the next char
340 # * ~23% want the previous
341 # * ~7% want the same char
342 #
343 # So it makes sense to shortcut early. And early is here.
344 var dpos = index - _position
345 var b = _bytepos
346 if dpos == 1 and index < len - 1 then
347 var its = _items
348 var c = its[b]
349 if c & 0x80u8 == 0x00u8 then
350 # We want the next, and current is easy.
351 # So next is easy to find!
352 b += 1
353 _position = index
354 _bytepos = b
355 # The rest will be done by `dpos==0` bellow.
356 dpos = 0
357 end
358 else if dpos == -1 and index > 1 then
359 var its = _items
360 var c = its[b-1]
361 if c & 0x80u8 == 0x00u8 then
362 # We want the previous, and it is easy.
363 b -= 1
364 dpos = 0
365 _position = index
366 _bytepos = b
367 return c.ascii
368 end
369 end
370 if dpos == 0 then
371 # We know what we want (+0 or +1) just get it now!
372 var its = _items
373 var c = its[b]
374 if c & 0x80u8 == 0x00u8 then return c.ascii
375 return items.char_at(b)
376 end
377
378 assert index >= 0 and index < len
379 return fetch_char_at(index)
380 end
381
382 # Gets a `Char` at `index` in `self`
383 #
384 # WARNING: Use at your own risks as no bound-checking is done
385 fun fetch_char_at(index: Int): Char do
386 var i = char_to_byte_index(index)
387 var items = _items
388 var b = items[i]
389 if b & 0x80u8 == 0x00u8 then return b.ascii
390 return items.char_at(i)
391 end
392
393 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
394 #
395 # assert "ff".to_hex == 255
396 redef fun to_hex(pos, ln) do
397 var res = 0
398 if pos == null then pos = 0
399 if ln == null then ln = length - pos
400 pos = char_to_byte_index(pos)
401 var its = _items
402 var max = pos + ln
403 for i in [pos .. max[ do
404 res <<= 4
405 res += its[i].ascii.from_hex
406 end
407 return res
408 end
409
410 redef fun copy_to_native(dst, n, src_off, dst_off) do
411 _items.copy_to(dst, n, first_byte + src_off, dst_off)
412 end
413 end
414
415 # Immutable strings of characters.
416 abstract class FlatString
417 super FlatText
418 super String
419
420 # Index at which `self` begins in `_items`, inclusively
421 redef var first_byte is noinit
422
423 redef fun chars do return new FlatStringCharView(self)
424
425 redef fun bytes do return new FlatStringByteView(self)
426
427 redef fun to_cstring do
428 var blen = _byte_length
429 var new_items = new NativeString(blen + 1)
430 _items.copy_to(new_items, blen, _first_byte, 0)
431 new_items[blen] = 0u8
432 return new_items
433 end
434
435 redef fun reversed do
436 var b = new FlatBuffer.with_capacity(_byte_length + 1)
437 var i = _length - 1
438 while i >= 0 do
439 b.add self.fetch_char_at(i)
440 i -= 1
441 end
442 var s = b.to_s.as(FlatString)
443 s._length = self._length
444 return s
445 end
446
447 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
448
449 redef fun substring(from, count)
450 do
451 if count <= 0 then return ""
452
453 if from < 0 then
454 count += from
455 if count <= 0 then return ""
456 from = 0
457 end
458
459 var ln = _length
460 if (count + from) > ln then count = ln - from
461 if count <= 0 then return ""
462 var end_index = from + count - 1
463 return substring_impl(from, count, end_index)
464 end
465
466 private fun substring_impl(from, count, end_index: Int): String do
467 var cache = _position
468 var dfrom = (cache - from).abs
469 var dend = (end_index - from).abs
470
471 var bytefrom: Int
472 var byteto: Int
473 if dfrom < dend then
474 bytefrom = char_to_byte_index(from)
475 byteto = char_to_byte_index(end_index)
476 else
477 byteto = char_to_byte_index(end_index)
478 bytefrom = char_to_byte_index(from)
479 end
480
481 var its = _items
482 byteto += its.length_of_char_at(byteto) - 1
483
484 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, count)
485 return s
486 end
487
488 redef fun empty do return "".as(FlatString)
489
490 redef fun to_upper
491 do
492 var outstr = new FlatBuffer.with_capacity(self._byte_length + 1)
493
494 var mylen = _length
495 var pos = 0
496
497 while pos < mylen do
498 outstr.add(chars[pos].to_upper)
499 pos += 1
500 end
501
502 return outstr.to_s
503 end
504
505 redef fun to_lower
506 do
507 var outstr = new FlatBuffer.with_capacity(self._byte_length + 1)
508
509 var mylen = _length
510 var pos = 0
511
512 while pos < mylen do
513 outstr.add(chars[pos].to_lower)
514 pos += 1
515 end
516
517 return outstr.to_s
518 end
519
520 redef fun output
521 do
522 for i in chars do i.output
523 end
524
525 ##################################################
526 # String Specific Methods #
527 ##################################################
528
529 # Low-level creation of a new string with minimal data.
530 #
531 # `_items` will be used as is, without copy, to retrieve the characters of the string.
532 # Aliasing issues is the responsibility of the caller.
533 private new with_infos(items: NativeString, byte_length, from: Int)
534 do
535 var len = items.utf8_length(from, byte_length)
536 if byte_length == len then return new ASCIIFlatString.full_data(items, byte_length, from, len)
537 return new UnicodeFlatString.full_data(items, byte_length, from, len)
538 end
539
540 # Low-level creation of a new string with all the data.
541 #
542 # `_items` will be used as is, without copy, to retrieve the characters of the string.
543 # Aliasing issues is the responsibility of the caller.
544 private new full(items: NativeString, byte_length, from, length: Int)
545 do
546 if byte_length == length then return new ASCIIFlatString.full_data(items, byte_length, from, length)
547 return new UnicodeFlatString.full_data(items, byte_length, from, length)
548 end
549
550 redef fun ==(other)
551 do
552 if not other isa FlatText then return super
553
554 if self.object_id == other.object_id then return true
555
556 var my_length = _byte_length
557
558 if other._byte_length != my_length then return false
559
560 var my_index = _first_byte
561 var its_index = other.first_byte
562
563 var last_iteration = my_index + my_length
564
565 var its_items = other._items
566 var my_items = self._items
567
568 while my_index < last_iteration do
569 if my_items[my_index] != its_items[its_index] then return false
570 my_index += 1
571 its_index += 1
572 end
573
574 return true
575 end
576
577 redef fun <(other)
578 do
579 if not other isa FlatText then return super
580
581 if self.object_id == other.object_id then return false
582
583 var myits = _items
584 var itsits = other._items
585
586 var mbt = _byte_length
587 var obt = other.byte_length
588
589 var minln = if mbt < obt then mbt else obt
590 var mst = _first_byte
591 var ost = other.first_byte
592
593 for i in [0 .. minln[ do
594 var my_curr_char = myits[mst]
595 var its_curr_char = itsits[ost]
596
597 if my_curr_char > its_curr_char then return false
598 if my_curr_char < its_curr_char then return true
599
600 mst += 1
601 ost += 1
602 end
603
604 return mbt < obt
605 end
606
607 redef fun +(o) do
608 var s = o.to_s
609 var slen = s.byte_length
610 var mlen = _byte_length
611 var nlen = mlen + slen
612 var mits = _items
613 var mifrom = _first_byte
614 if s isa FlatText then
615 var sits = s._items
616 var sifrom = s.first_byte
617 var ns = new NativeString(nlen + 1)
618 mits.copy_to(ns, mlen, mifrom, 0)
619 sits.copy_to(ns, slen, sifrom, mlen)
620 return new FlatString.full(ns, nlen, 0, _length + o.length)
621 else
622 abort
623 end
624 end
625
626 redef fun *(i) do
627 var mybtlen = _byte_length
628 var new_byte_length = mybtlen * i
629 var mylen = _length
630 var newlen = mylen * i
631 var its = _items
632 var fb = _first_byte
633 var ns = new NativeString(new_byte_length + 1)
634 ns[new_byte_length] = 0u8
635 var offset = 0
636 while i > 0 do
637 its.copy_to(ns, mybtlen, fb, offset)
638 offset += mybtlen
639 i -= 1
640 end
641 return new FlatString.full(ns, new_byte_length, 0, newlen)
642 end
643
644 redef fun hash
645 do
646 if hash_cache == null then
647 # djb2 hash algorithm
648 var h = 5381
649 var i = _first_byte
650
651 var my_items = _items
652 var max = last_byte
653
654 while i <= max do
655 h = (h << 5) + h + my_items[i].to_i
656 i += 1
657 end
658
659 hash_cache = h
660 end
661
662 return hash_cache.as(not null)
663 end
664
665 redef fun substrings do return new FlatSubstringsIter(self)
666 end
667
668 # Regular Nit UTF-8 strings
669 private class UnicodeFlatString
670 super FlatString
671
672 init full_data(items: NativeString, byte_length, from, length: Int) do
673 self._items = items
674 self._length = length
675 self._byte_length = byte_length
676 _first_byte = from
677 _bytepos = from
678 end
679
680 redef fun substring_from(from) do
681 if from >= self._length then return empty
682 if from <= 0 then return self
683 var c = char_to_byte_index(from)
684 var st = c - _first_byte
685 var fln = byte_length - st
686 return new FlatString.full(items, fln, c, _length - from)
687 end
688 end
689
690 # Special cases of String where all the characters are ASCII-based
691 #
692 # Optimizes access operations to O(1) complexity.
693 private class ASCIIFlatString
694 super FlatString
695
696 init full_data(items: NativeString, byte_length, from, length: Int) do
697 self._items = items
698 self._length = length
699 self._byte_length = byte_length
700 _first_byte = from
701 _bytepos = from
702 end
703
704 redef fun [](idx) do
705 assert idx < _byte_length and idx >= 0
706 return _items[idx + _first_byte].ascii
707 end
708
709 redef fun substring(from, count) do
710 var ln = _length
711 if count <= 0 then return ""
712 if (count + from) > ln then count = ln - from
713 if count <= 0 then return ""
714 if from < 0 then
715 count += from
716 if count <= 0 then return ""
717 from = 0
718 end
719 return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
720 end
721
722 redef fun reversed do
723 var b = new FlatBuffer.with_capacity(_byte_length + 1)
724 var i = _length - 1
725 while i >= 0 do
726 b.add self[i]
727 i -= 1
728 end
729 var s = b.to_s.as(FlatString)
730 return s
731 end
732
733 redef fun char_to_byte_index(index) do return index + _first_byte
734
735 redef fun substring_impl(from, count, end_index) do
736 return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
737 end
738
739 redef fun fetch_char_at(i) do return _items[i + _first_byte].ascii
740 end
741
742 private class FlatStringCharReverseIterator
743 super IndexedIterator[Char]
744
745 var target: FlatString
746
747 var curr_pos: Int
748
749 redef fun is_ok do return curr_pos >= 0
750
751 redef fun item do return target[curr_pos]
752
753 redef fun next do curr_pos -= 1
754
755 redef fun index do return curr_pos
756
757 end
758
759 private class FlatStringCharIterator
760 super IndexedIterator[Char]
761
762 var target: FlatString
763
764 var max: Int is noautoinit
765
766 var curr_pos: Int
767
768 init do max = target._length - 1
769
770 redef fun is_ok do return curr_pos <= max
771
772 redef fun item do return target[curr_pos]
773
774 redef fun next do curr_pos += 1
775
776 redef fun index do return curr_pos
777
778 end
779
780 private class FlatStringCharView
781 super StringCharView
782
783 redef type SELFTYPE: FlatString
784
785 redef fun [](index) do return target[index]
786
787 redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
788
789 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
790
791 end
792
793 private class FlatStringByteReverseIterator
794 super IndexedIterator[Byte]
795
796 var target: FlatString
797
798 var target_items: NativeString is noautoinit
799
800 var curr_pos: Int
801
802 init
803 do
804 var tgt = target
805 target_items = tgt._items
806 curr_pos += tgt._first_byte
807 end
808
809 redef fun is_ok do return curr_pos >= target._first_byte
810
811 redef fun item do return target_items[curr_pos]
812
813 redef fun next do curr_pos -= 1
814
815 redef fun index do return curr_pos - target._first_byte
816
817 end
818
819 private class FlatStringByteIterator
820 super IndexedIterator[Byte]
821
822 var target: FlatString
823
824 var target_items: NativeString is noautoinit
825
826 var curr_pos: Int
827
828 init
829 do
830 var tgt = target
831 target_items = tgt._items
832 curr_pos += tgt._first_byte
833 end
834
835 redef fun is_ok do return curr_pos <= target.last_byte
836
837 redef fun item do return target_items[curr_pos]
838
839 redef fun next do curr_pos += 1
840
841 redef fun index do return curr_pos - target._first_byte
842
843 end
844
845 private class FlatStringByteView
846 super StringByteView
847
848 redef type SELFTYPE: FlatString
849
850 redef fun [](index)
851 do
852 # Check that the index (+ _first_byte) is not larger than last_byte
853 # In other terms, if the index is valid
854 var target = _target
855 assert index >= 0 and index < target._byte_length
856 var ind = index + target._first_byte
857 return target._items[ind]
858 end
859
860 redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
861
862 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
863
864 end
865
866 redef class Buffer
867 redef new do return new FlatBuffer
868
869 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
870 end
871
872 # Mutable strings of characters.
873 class FlatBuffer
874 super FlatText
875 super Buffer
876
877 redef fun chars do return new FlatBufferCharView(self)
878
879 redef fun bytes do return new FlatBufferByteView(self)
880
881 private var capacity = 0
882
883 redef fun fast_cstring do return _items.fast_cstring(0)
884
885 redef fun substrings do return new FlatSubstringsIter(self)
886
887 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
888 #
889 # This happens when an operation modifies the current `Buffer` and
890 # the Copy-On-Write flag `written` is set at true.
891 private fun reset do
892 var nns = new NativeString(capacity)
893 if _byte_length != 0 then _items.copy_to(nns, _byte_length, 0, 0)
894 _items = nns
895 written = false
896 end
897
898 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
899 #
900 # Internal only, does not modify _byte_length or length, this is the caller's responsability
901 private fun rshift_bytes(from: Int, len: Int) do
902 var oit = _items
903 var nit = _items
904 var bt = _byte_length
905 if bt + len > capacity then
906 capacity = capacity * 2 + 2
907 nit = new NativeString(capacity)
908 oit.copy_to(nit, 0, 0, from)
909 end
910 oit.copy_to(nit, bt - from, from, from + len)
911 end
912
913 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
914 #
915 # Internal only, does not modify _byte_length or length, this is the caller's responsability
916 private fun lshift_bytes(from: Int, len: Int) do
917 var it = _items
918 it.copy_to(it, _byte_length - from, from, from - len)
919 end
920
921 redef fun []=(index, item)
922 do
923 assert index >= 0 and index <= _length
924 if written then reset
925 if index == _length then
926 add item
927 return
928 end
929 var it = _items
930 var ip = it.char_to_byte_index(index)
931 var c = it.char_at(ip)
932 var clen = c.u8char_len
933 var itemlen = item.u8char_len
934 var size_diff = itemlen - clen
935 if size_diff > 0 then
936 rshift_bytes(ip + clen, size_diff)
937 else if size_diff < 0 then
938 lshift_bytes(ip + clen, -size_diff)
939 end
940 _byte_length += size_diff
941 it.set_char_at(ip, item)
942 end
943
944 redef fun add(c)
945 do
946 if written then reset
947 var clen = c.u8char_len
948 var bt = _byte_length
949 enlarge(bt + clen)
950 _items.set_char_at(bt, c)
951 _byte_length += clen
952 _length += 1
953 end
954
955 redef fun clear do
956 _byte_length = 0
957 _length = 0
958 if written then
959 _capacity = 16
960 reset
961 end
962 end
963
964 redef fun empty do return new Buffer
965
966 redef fun enlarge(cap)
967 do
968 var c = capacity
969 if cap <= c then return
970 if c <= 16 then c = 16
971 while c <= cap do c = c * 2
972 # The COW flag can be set at false here, since
973 # it does a copy of the current `Buffer`
974 written = false
975 var bln = _byte_length
976 var a = new NativeString(c)
977 if bln > 0 then
978 var it = _items
979 if bln > 0 then it.copy_to(a, bln, 0, 0)
980 end
981 _items = a
982 capacity = c
983 end
984
985 redef fun to_s
986 do
987 written = true
988 var bln = _byte_length
989 if bln == 0 then _items = new NativeString(1)
990 return new FlatString.full(_items, bln, 0, _length)
991 end
992
993 redef fun to_cstring
994 do
995 var bln = _byte_length
996 var new_native = new NativeString(bln + 1)
997 new_native[bln] = 0u8
998 if _length > 0 then _items.copy_to(new_native, bln, 0, 0)
999 return new_native
1000 end
1001
1002 # Create a new empty string.
1003 init do end
1004
1005 # Low-level creation a new buffer with given data.
1006 #
1007 # `_items` will be used as is, without copy, to store the characters of the buffer.
1008 # Aliasing issues is the responsibility of the caller.
1009 #
1010 # If `_items` is shared, `written` should be set to true after the creation
1011 # so that a modification will do a copy-on-write.
1012 private init with_infos(items: NativeString, capacity, byte_length, length: Int)
1013 do
1014 self._items = items
1015 self.capacity = capacity
1016 self._byte_length = byte_length
1017 self._length = length
1018 end
1019
1020 # Create a new string copied from `s`.
1021 init from(s: Text)
1022 do
1023 _items = new NativeString(s.byte_length)
1024 for i in s.substrings do i._items.copy_to(_items, i._byte_length, first_byte, 0)
1025 _byte_length = s.byte_length
1026 _length = s.length
1027 _capacity = _byte_length
1028 end
1029
1030 # Create a new empty string with a given capacity.
1031 init with_capacity(cap: Int)
1032 do
1033 assert cap >= 0
1034 _items = new NativeString(cap)
1035 capacity = cap
1036 _byte_length = 0
1037 end
1038
1039 redef fun append(s)
1040 do
1041 if s.is_empty then return
1042 var sl = s.byte_length
1043 var nln = _byte_length + sl
1044 enlarge(nln)
1045 if s isa FlatText then
1046 s._items.copy_to(_items, sl, s.first_byte, _byte_length)
1047 else
1048 for i in s.substrings do append i
1049 return
1050 end
1051 _byte_length = nln
1052 _length += s.length
1053 end
1054
1055 # Copies the content of self in `dest`
1056 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
1057 do
1058 var self_chars = self.chars
1059 var dest_chars = dest.chars
1060 for i in [0..len-1] do
1061 dest_chars[new_start+i] = self_chars[start+i]
1062 end
1063 end
1064
1065 redef fun substring(from, count)
1066 do
1067 assert count >= 0
1068 if from < 0 then from = 0
1069 if (from + count) > _length then count = _length - from
1070 if count <= 0 then return new Buffer
1071 var its = _items
1072 var bytefrom = its.char_to_byte_index(from)
1073 var byteto = its.char_to_byte_index(count + from - 1)
1074 byteto += its.char_at(byteto).u8char_len - 1
1075 var byte_length = byteto - bytefrom + 1
1076 var r_items = new NativeString(byte_length)
1077 its.copy_to(r_items, byte_length, bytefrom, 0)
1078 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
1079 end
1080
1081 redef fun append_substring_impl(s, from, length) do
1082 if length <= 0 then return
1083 if not s isa FlatText then
1084 super
1085 return
1086 end
1087 var sits = s._items
1088 var bytest = s.char_to_byte_index(from)
1089 var bytend = s.char_to_byte_index(from + length - 1)
1090 var btln = bytend - bytest + sits.char_at(bytend).u8char_len
1091 enlarge(btln + _byte_length)
1092 sits.copy_to(_items, btln, bytest, _byte_length)
1093 _byte_length += btln
1094 _length += length
1095 end
1096
1097 redef fun reverse
1098 do
1099 written = false
1100 var ns = new FlatBuffer.with_capacity(capacity)
1101 for i in chars.reverse_iterator do ns.add i
1102 _items = ns._items
1103 end
1104
1105 redef fun times(repeats)
1106 do
1107 var bln = _byte_length
1108 var x = new FlatString.full(_items, bln, 0, _length)
1109 for i in [1 .. repeats[ do
1110 append(x)
1111 end
1112 end
1113
1114 redef fun upper
1115 do
1116 if written then reset
1117 for i in [0 .. _length[ do self[i] = self[i].to_upper
1118 end
1119
1120 redef fun lower
1121 do
1122 if written then reset
1123 for i in [0 .. _length[ do self[i] = self[i].to_lower
1124 end
1125 end
1126
1127 private class FlatBufferByteReverseIterator
1128 super IndexedIterator[Byte]
1129
1130 var target: FlatBuffer
1131
1132 var target_items: NativeString is noautoinit
1133
1134 var curr_pos: Int
1135
1136 init do target_items = target._items
1137
1138 redef fun index do return curr_pos
1139
1140 redef fun is_ok do return curr_pos >= 0
1141
1142 redef fun item do return target_items[curr_pos]
1143
1144 redef fun next do curr_pos -= 1
1145
1146 end
1147
1148 private class FlatBufferByteView
1149 super BufferByteView
1150
1151 redef type SELFTYPE: FlatBuffer
1152
1153 redef fun [](index) do return target._items[index]
1154
1155 redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
1156
1157 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
1158
1159 end
1160
1161 private class FlatBufferByteIterator
1162 super IndexedIterator[Byte]
1163
1164 var target: FlatBuffer
1165
1166 var target_items: NativeString is noautoinit
1167
1168 var curr_pos: Int
1169
1170 init do target_items = target._items
1171
1172 redef fun index do return curr_pos
1173
1174 redef fun is_ok do return curr_pos < target._byte_length
1175
1176 redef fun item do return target_items[curr_pos]
1177
1178 redef fun next do curr_pos += 1
1179
1180 end
1181
1182 private class FlatBufferCharReverseIterator
1183 super IndexedIterator[Char]
1184
1185 var target: FlatBuffer
1186
1187 var curr_pos: Int
1188
1189 redef fun index do return curr_pos
1190
1191 redef fun is_ok do return curr_pos >= 0
1192
1193 redef fun item do return target[curr_pos]
1194
1195 redef fun next do curr_pos -= 1
1196
1197 end
1198
1199 private class FlatBufferCharView
1200 super BufferCharView
1201
1202 redef type SELFTYPE: FlatBuffer
1203
1204 redef fun [](index) do return target[index]
1205
1206 redef fun []=(index, item)
1207 do
1208 assert index >= 0 and index <= length
1209 if index == length then
1210 add(item)
1211 return
1212 end
1213 target[index] = item
1214 end
1215
1216 redef fun push(c)
1217 do
1218 target.add(c)
1219 end
1220
1221 redef fun add(c)
1222 do
1223 target.add(c)
1224 end
1225
1226 fun enlarge(cap: Int)
1227 do
1228 target.enlarge(cap)
1229 end
1230
1231 redef fun append(s)
1232 do
1233 var s_length = s.length
1234 if target.capacity < s.length then enlarge(s_length + target._length)
1235 for i in s do target.add i
1236 end
1237
1238 redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
1239
1240 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
1241
1242 end
1243
1244 private class FlatBufferCharIterator
1245 super IndexedIterator[Char]
1246
1247 var target: FlatBuffer
1248
1249 var max: Int is noautoinit
1250
1251 var curr_pos: Int
1252
1253 init do max = target._length - 1
1254
1255 redef fun index do return curr_pos
1256
1257 redef fun is_ok do return curr_pos <= max
1258
1259 redef fun item do return target[curr_pos]
1260
1261 redef fun next do curr_pos += 1
1262
1263 end
1264
1265 redef class NativeString
1266 redef fun to_s
1267 do
1268 return to_s_with_length(cstring_length)
1269 end
1270
1271 redef fun to_s_with_length(length)
1272 do
1273 assert length >= 0
1274 return clean_utf8(length)
1275 end
1276
1277 redef fun to_s_full(byte_length, unilen) do
1278 return new FlatString.full(self, byte_length, 0, unilen)
1279 end
1280
1281 redef fun to_s_unsafe(len) do
1282 if len == null then len = cstring_length
1283 return new FlatString.with_infos(self, len, 0)
1284 end
1285
1286 redef fun to_s_with_copy do return to_s_with_copy_and_length(cstring_length)
1287
1288 # Get a `String` from `length` bytes at `self` copied into Nit memory
1289 fun to_s_with_copy_and_length(length: Int): String
1290 do
1291 var r = clean_utf8(length)
1292 if r.items != self then return r
1293 var new_self = new NativeString(length + 1)
1294 copy_to(new_self, length, 0, 0)
1295 var str = new FlatString.with_infos(new_self, length, 0)
1296 new_self[length] = 0u8
1297 return str
1298 end
1299
1300 # Cleans a NativeString if necessary
1301 fun clean_utf8(len: Int): FlatString do
1302 var replacements: nullable Array[Int] = null
1303 var end_length = len
1304 var pos = 0
1305 var chr_ln = 0
1306 var rem = len
1307 while rem > 0 do
1308 while rem >= 4 do
1309 var i = fetch_4_chars(pos)
1310 if i & 0x80808080 != 0 then break
1311 pos += 4
1312 chr_ln += 4
1313 rem -= 4
1314 end
1315 if rem == 0 then break
1316 var b = self[pos]
1317 if b & 0x80u8 == 0x00u8 then
1318 pos += 1
1319 chr_ln += 1
1320 rem -= 1
1321 continue
1322 end
1323 var nxst = length_of_char_at(pos)
1324 var ok_st: Bool
1325 if nxst == 1 then
1326 ok_st = b & 0x80u8 == 0u8
1327 else if nxst == 2 then
1328 ok_st = b & 0xE0u8 == 0xC0u8
1329 else if nxst == 3 then
1330 ok_st = b & 0xF0u8 == 0xE0u8
1331 else
1332 ok_st = b & 0xF8u8 == 0xF0u8
1333 end
1334 if not ok_st then
1335 if replacements == null then replacements = new Array[Int]
1336 replacements.add pos
1337 end_length += 2
1338 pos += 1
1339 rem -= 1
1340 chr_ln += 1
1341 continue
1342 end
1343 var ok_c: Bool
1344 var c = char_at(pos)
1345 var cp = c.code_point
1346 if nxst == 1 then
1347 ok_c = cp >= 0 and cp <= 0x7F
1348 else if nxst == 2 then
1349 ok_c = cp >= 0x80 and cp <= 0x7FF
1350 else if nxst == 3 then
1351 ok_c = cp >= 0x800 and cp <= 0xFFFF
1352 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1353 else
1354 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1355 end
1356 if not ok_c then
1357 if replacements == null then replacements = new Array[Int]
1358 replacements.add pos
1359 end_length += 2
1360 pos += 1
1361 chr_ln += 1
1362 rem -= 1
1363 continue
1364 end
1365 var clen = c.u8char_len
1366 pos += clen
1367 rem -= clen
1368 chr_ln += 1
1369 end
1370 var ret = self
1371 if end_length != len then
1372 ret = new NativeString(end_length)
1373 var old_repl = 0
1374 var off = 0
1375 var repls = replacements.as(not null)
1376 var r = repls.items.as(not null)
1377 var imax = repls.length
1378 for i in [0 .. imax[ do
1379 var repl_pos = r[i]
1380 var chkln = repl_pos - old_repl
1381 copy_to(ret, chkln, old_repl, off)
1382 off += chkln
1383 ret[off] = 0xEFu8
1384 ret[off + 1] = 0xBFu8
1385 ret[off + 2] = 0xBDu8
1386 old_repl = repl_pos + 1
1387 off += 3
1388 end
1389 copy_to(ret, len - old_repl, old_repl, off)
1390 end
1391 return new FlatString.full(ret, end_length, 0, chr_ln)
1392 end
1393
1394 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1395 #
1396 # Very unsafe, make sure to have room for this char prior to calling this function.
1397 private fun set_char_at(pos: Int, c: Char) do
1398 var cp = c.code_point
1399 if cp < 128 then
1400 self[pos] = cp.to_b
1401 return
1402 end
1403 var ln = c.u8char_len
1404 if ln == 2 then
1405 self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b
1406 self[pos + 1] = (0x80 | (cp & 0x3F)).to_b
1407 else if ln == 3 then
1408 self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b
1409 self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
1410 self[pos + 2] = (0x80 | (cp & 0x3F)).to_b
1411 else if ln == 4 then
1412 self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b
1413 self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b
1414 self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
1415 self[pos + 3] = (0x80 | (cp & 0x3F)).to_b
1416 end
1417 end
1418 end
1419
1420 redef class Int
1421 # return displayable int in base 10 and signed
1422 #
1423 # assert 1.to_s == "1"
1424 # assert (-123).to_s == "-123"
1425 redef fun to_s do
1426 # Fast case for common numbers
1427 if self == 0 then return "0"
1428 if self == 1 then return "1"
1429
1430 var nslen = int_to_s_len
1431 var ns = new NativeString(nslen + 1)
1432 ns[nslen] = 0u8
1433 native_int_to_s(ns, nslen + 1)
1434 return new FlatString.full(ns, nslen, 0, nslen)
1435 end
1436 end
1437
1438 redef class Array[E]
1439
1440 # Fast implementation
1441 redef fun plain_to_s
1442 do
1443 var l = _length
1444 if l == 0 then return ""
1445 var its = _items.as(not null)
1446 var first = its[0]
1447 if l == 1 then if first == null then return "" else return first.to_s
1448 var na = new NativeArray[String](l)
1449 var i = 0
1450 var sl = 0
1451 var mypos = 0
1452 while i < l do
1453 var itsi = its[i]
1454 if itsi == null then
1455 i += 1
1456 continue
1457 end
1458 var tmp = itsi.to_s
1459 sl += tmp.byte_length
1460 na[mypos] = tmp
1461 i += 1
1462 mypos += 1
1463 end
1464 var ns = new NativeString(sl + 1)
1465 ns[sl] = 0u8
1466 i = 0
1467 var off = 0
1468 while i < mypos do
1469 var tmp = na[i]
1470 if tmp isa FlatString then
1471 var tpl = tmp._byte_length
1472 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1473 off += tpl
1474 else
1475 for j in tmp.substrings do
1476 var s = j.as(FlatString)
1477 var slen = s._byte_length
1478 s._items.copy_to(ns, slen, s._first_byte, off)
1479 off += slen
1480 end
1481 end
1482 i += 1
1483 end
1484 return new FlatString.with_infos(ns, sl, 0)
1485 end
1486 end
1487
1488 redef class NativeArray[E]
1489 redef fun native_to_s do
1490 assert self isa NativeArray[String]
1491 var l = length
1492 var na = self
1493 var i = 0
1494 var sl = 0
1495 var mypos = 0
1496 while i < l do
1497 sl += na[i].byte_length
1498 i += 1
1499 mypos += 1
1500 end
1501 var ns = new NativeString(sl + 1)
1502 ns[sl] = 0u8
1503 i = 0
1504 var off = 0
1505 while i < mypos do
1506 var tmp = na[i]
1507 if tmp isa FlatString then
1508 var tpl = tmp._byte_length
1509 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1510 off += tpl
1511 else
1512 for j in tmp.substrings do
1513 var s = j.as(FlatString)
1514 var slen = s._byte_length
1515 s._items.copy_to(ns, slen, s._first_byte, off)
1516 off += slen
1517 end
1518 end
1519 i += 1
1520 end
1521 return new FlatString.with_infos(ns, sl, 0)
1522 end
1523 end
1524
1525 redef class Map[K,V]
1526 redef fun join(sep, couple_sep)
1527 do
1528 if is_empty then return ""
1529
1530 var s = new Buffer # Result
1531
1532 # Concat first item
1533 var i = iterator
1534 var k = i.key
1535 var e = i.item
1536 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1537
1538 # Concat other _items
1539 i.next
1540 while i.is_ok do
1541 s.append(sep)
1542 k = i.key
1543 e = i.item
1544 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1545 i.next
1546 end
1547 return s.to_s
1548 end
1549 end