Merge remote-tracking branch 'origin/master' into init_auto
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 # First byte of the NativeString
40 protected fun first_byte: Int do return 0
41
42 # Last byte of the NativeString
43 protected fun last_byte: Int do return first_byte + _byte_length - 1
44
45 # Cache of the latest position (char) explored in the string
46 var position: Int = 0
47
48 # Cached position (bytes) in the NativeString underlying the String
49 var bytepos: Int = 0
50
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index(index: Int): Int do
53 var dpos = index - _position
54 var b = _bytepos
55 var its = _items
56
57 if dpos == 1 then
58 if its[b] & 0x80u8 == 0x00u8 then
59 b += 1
60 else
61 b += its.length_of_char_at(b)
62 end
63 _bytepos = b
64 _position = index
65 return b
66 end
67 if dpos == -1 then
68 b = its.find_beginning_of_char_at(b - 1)
69 _bytepos = b
70 _position = index
71 return b
72 end
73 if dpos == 0 then return b
74
75 var ln = _length
76 var pos = _position
77 # Find best insertion point
78 var delta_begin = index
79 var delta_end = (ln - 1) - index
80 var delta_cache = (pos - index).abs
81 var min = delta_begin
82
83 if delta_cache < min then min = delta_cache
84 if delta_end < min then min = delta_end
85
86 var ns_i: Int
87 var my_i: Int
88
89 if min == delta_cache then
90 ns_i = _bytepos
91 my_i = pos
92 else if min == delta_begin then
93 ns_i = first_byte
94 my_i = 0
95 else
96 ns_i = its.find_beginning_of_char_at(last_byte)
97 my_i = _length - 1
98 end
99
100 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
101
102 _position = index
103 _bytepos = ns_i
104
105 return ns_i
106 end
107
108 # By escaping `self` to HTML, how many more bytes will be needed ?
109 fun chars_to_html_escape: Int do
110 var its = _items
111 var max = last_byte
112 var pos = first_byte
113 var endlen = 0
114 while pos <= max do
115 var c = its[pos]
116 if c == 0x3Cu8 then
117 endlen += 3
118 else if c == 0x3Eu8 then
119 endlen += 3
120 else if c == 0x26u8 then
121 endlen += 4
122 else if c == 0x22u8 then
123 endlen += 4
124 else if c == 0x27u8 then
125 endlen += 4
126 else if c == 0x2Fu8 then
127 endlen += 4
128 end
129 pos += 1
130 end
131 return endlen
132 end
133
134 redef fun html_escape
135 do
136 var extra = chars_to_html_escape
137 if extra == 0 then return to_s
138 var its = _items
139 var max = last_byte
140 var pos = first_byte
141 var nlen = extra + _byte_length
142 var nits = new NativeString(nlen)
143 var outpos = 0
144 while pos <= max do
145 var c = its[pos]
146 # Special codes:
147 # Some HTML characters are used as meta-data, they need
148 # to be replaced by an HTML-Escaped equivalent
149 #
150 # * 0x3C (<) => &lt;
151 # * 0x3E (>) => &gt;
152 # * 0x26 (&) => &amp;
153 # * 0x22 (") => &#34;
154 # * 0x27 (') => &#39;
155 # * 0x2F (/) => &#47;
156 if c == 0x3Cu8 then
157 nits[outpos] = 0x26u8
158 nits[outpos + 1] = 0x6Cu8
159 nits[outpos + 2] = 0x74u8
160 nits[outpos + 3] = 0x3Bu8
161 outpos += 4
162 else if c == 0x3Eu8 then
163 nits[outpos] = 0x26u8
164 nits[outpos + 1] = 0x67u8
165 nits[outpos + 2] = 0x74u8
166 nits[outpos + 3] = 0x3Bu8
167 outpos += 4
168 else if c == 0x26u8 then
169 nits[outpos] = 0x26u8
170 nits[outpos + 1] = 0x61u8
171 nits[outpos + 2] = 0x6Du8
172 nits[outpos + 3] = 0x70u8
173 nits[outpos + 4] = 0x3Bu8
174 outpos += 5
175 else if c == 0x22u8 then
176 nits[outpos] = 0x26u8
177 nits[outpos + 1] = 0x23u8
178 nits[outpos + 2] = 0x33u8
179 nits[outpos + 3] = 0x34u8
180 nits[outpos + 4] = 0x3Bu8
181 outpos += 5
182 else if c == 0x27u8 then
183 nits[outpos] = 0x26u8
184 nits[outpos + 1] = 0x23u8
185 nits[outpos + 2] = 0x33u8
186 nits[outpos + 3] = 0x39u8
187 nits[outpos + 4] = 0x3Bu8
188 outpos += 5
189 else if c == 0x2Fu8 then
190 nits[outpos] = 0x26u8
191 nits[outpos + 1] = 0x23u8
192 nits[outpos + 2] = 0x34u8
193 nits[outpos + 3] = 0x37u8
194 nits[outpos + 4] = 0x3Bu8
195 outpos += 5
196 else
197 nits[outpos] = c
198 outpos += 1
199 end
200 pos += 1
201 end
202 var s = new FlatString.with_infos(nits, nlen, 0)
203 return s
204 end
205
206 # By escaping `self` to C, how many more bytes will be needed ?
207 #
208 # This enables a double-optimization in `escape_to_c` since if this
209 # method returns 0, then `self` does not need escaping and can be
210 # returned as-is
211 fun chars_to_escape_to_c: Int do
212 var its = _items
213 var max = last_byte
214 var pos = first_byte
215 var req_esc = 0
216 while pos <= max do
217 var c = its[pos]
218 if c == 0x0Au8 then
219 req_esc += 1
220 else if c == 0x09u8 then
221 req_esc += 1
222 else if c == 0x22u8 then
223 req_esc += 1
224 else if c == 0x27u8 then
225 req_esc += 1
226 else if c == 0x5Cu8 then
227 req_esc += 1
228 else if c == 0x3Fu8 then
229 var j = pos + 1
230 if j < length then
231 var next = its[j]
232 # We ignore `??'` because it will be escaped as `??\'`.
233 if
234 next == 0x21u8 or
235 next == 0x28u8 or
236 next == 0x29u8 or
237 next == 0x2Du8 or
238 next == 0x2Fu8 or
239 next == 0x3Cu8 or
240 next == 0x3Du8 or
241 next == 0x3Eu8
242 then req_esc += 1
243 end
244 else if c < 32u8 then
245 req_esc += 3
246 end
247 pos += 1
248 end
249 return req_esc
250 end
251
252 redef fun escape_to_c do
253 var ln_extra = chars_to_escape_to_c
254 if ln_extra == 0 then return self.to_s
255 var its = _items
256 var max = last_byte
257 var nlen = _byte_length + ln_extra
258 var nns = new NativeString(nlen)
259 var pos = first_byte
260 var opos = 0
261 while pos <= max do
262 var c = its[pos]
263 # Special codes:
264 #
265 # Any byte with value < 32 is a control character
266 # All their uses will be replaced by their octal
267 # value in C.
268 #
269 # There are two exceptions however:
270 #
271 # * 0x09 => \t
272 # * 0x0A => \n
273 #
274 # Aside from the code points above, the following are:
275 #
276 # * 0x22 => \"
277 # * 0x27 => \'
278 # * 0x5C => \\
279 if c == 0x09u8 then
280 nns[opos] = 0x5Cu8
281 nns[opos + 1] = 0x74u8
282 opos += 2
283 else if c == 0x0Au8 then
284 nns[opos] = 0x5Cu8
285 nns[opos + 1] = 0x6Eu8
286 opos += 2
287 else if c == 0x22u8 then
288 nns[opos] = 0x5Cu8
289 nns[opos + 1] = 0x22u8
290 opos += 2
291 else if c == 0x27u8 then
292 nns[opos] = 0x5Cu8
293 nns[opos + 1] = 0x27u8
294 opos += 2
295 else if c == 0x5Cu8 then
296 nns[opos] = 0x5Cu8
297 nns[opos + 1] = 0x5Cu8
298 opos += 2
299 else if c == 0x3Fu8 then
300 var j = pos + 1
301 if j < length then
302 var next = its[j]
303 # We ignore `??'` because it will be escaped as `??\'`.
304 if
305 next == 0x21u8 or
306 next == 0x28u8 or
307 next == 0x29u8 or
308 next == 0x2Du8 or
309 next == 0x2Fu8 or
310 next == 0x3Cu8 or
311 next == 0x3Du8 or
312 next == 0x3Eu8
313 then
314 nns[opos] = 0x5Cu8
315 opos += 1
316 end
317 end
318 nns[opos] = 0x3Fu8
319 opos += 1
320 else if c < 32u8 then
321 nns[opos] = 0x5Cu8
322 nns[opos + 1] = 0x30u8
323 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
324 nns[opos + 3] = (c & 0x07u8) + 0x30u8
325 opos += 4
326 else
327 nns[opos] = c
328 opos += 1
329 end
330 pos += 1
331 end
332 return nns.to_s_unsafe(nlen)
333 end
334
335 redef fun [](index) do
336 var len = _length
337
338 # Statistically:
339 # * ~70% want the next char
340 # * ~23% want the previous
341 # * ~7% want the same char
342 #
343 # So it makes sense to shortcut early. And early is here.
344 var dpos = index - _position
345 var b = _bytepos
346 if dpos == 1 and index < len - 1 then
347 var its = _items
348 var c = its[b]
349 if c & 0x80u8 == 0x00u8 then
350 # We want the next, and current is easy.
351 # So next is easy to find!
352 b += 1
353 _position = index
354 _bytepos = b
355 # The rest will be done by `dpos==0` bellow.
356 dpos = 0
357 end
358 else if dpos == -1 and index > 1 then
359 var its = _items
360 var c = its[b-1]
361 if c & 0x80u8 == 0x00u8 then
362 # We want the previous, and it is easy.
363 b -= 1
364 dpos = 0
365 _position = index
366 _bytepos = b
367 return c.ascii
368 end
369 end
370 if dpos == 0 then
371 # We know what we want (+0 or +1) just get it now!
372 var its = _items
373 var c = its[b]
374 if c & 0x80u8 == 0x00u8 then return c.ascii
375 return items.char_at(b)
376 end
377
378 assert index >= 0 and index < len
379 return fetch_char_at(index)
380 end
381
382 # Gets a `Char` at `index` in `self`
383 #
384 # WARNING: Use at your own risks as no bound-checking is done
385 fun fetch_char_at(index: Int): Char do
386 var i = char_to_byte_index(index)
387 var items = _items
388 var b = items[i]
389 if b & 0x80u8 == 0x00u8 then return b.ascii
390 return items.char_at(i)
391 end
392
393 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
394 #
395 # assert "ff".to_hex == 255
396 redef fun to_hex(pos, ln) do
397 var res = 0
398 if pos == null then pos = 0
399 if ln == null then ln = length - pos
400 pos = char_to_byte_index(pos)
401 var its = _items
402 var max = pos + ln
403 for i in [pos .. max[ do
404 res <<= 4
405 res += its[i].ascii.from_hex
406 end
407 return res
408 end
409
410 redef fun copy_to_native(dst, n, src_off, dst_off) do
411 _items.copy_to(dst, n, first_byte + src_off, dst_off)
412 end
413 end
414
415 # Immutable strings of characters.
416 abstract class FlatString
417 super FlatText
418 super String
419
420 # Index at which `self` begins in `_items`, inclusively
421 redef var first_byte is noinit
422
423 redef fun chars do return new FlatStringCharView(self)
424
425 redef fun bytes do return new FlatStringByteView(self)
426
427 redef fun to_cstring do
428 var blen = _byte_length
429 var new_items = new NativeString(blen + 1)
430 _items.copy_to(new_items, blen, _first_byte, 0)
431 new_items[blen] = 0u8
432 return new_items
433 end
434
435 redef fun reversed do
436 var b = new FlatBuffer.with_capacity(_byte_length + 1)
437 var i = _length - 1
438 while i >= 0 do
439 b.add self.fetch_char_at(i)
440 i -= 1
441 end
442 var s = b.to_s.as(FlatString)
443 s._length = self._length
444 return s
445 end
446
447 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
448
449 redef fun substring(from, count)
450 do
451 if count <= 0 then return ""
452
453 if from < 0 then
454 count += from
455 if count <= 0 then return ""
456 from = 0
457 end
458
459 var ln = _length
460 if (count + from) > ln then count = ln - from
461 if count <= 0 then return ""
462 var end_index = from + count - 1
463 return substring_impl(from, count, end_index)
464 end
465
466 private fun substring_impl(from, count, end_index: Int): String do
467 var cache = _position
468 var dfrom = (cache - from).abs
469 var dend = (end_index - from).abs
470
471 var bytefrom: Int
472 var byteto: Int
473 if dfrom < dend then
474 bytefrom = char_to_byte_index(from)
475 byteto = char_to_byte_index(end_index)
476 else
477 byteto = char_to_byte_index(end_index)
478 bytefrom = char_to_byte_index(from)
479 end
480
481 var its = _items
482 byteto += its.length_of_char_at(byteto) - 1
483
484 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, count)
485 return s
486 end
487
488 redef fun empty do return "".as(FlatString)
489
490 redef fun to_upper
491 do
492 var outstr = new FlatBuffer.with_capacity(self._byte_length + 1)
493
494 var mylen = _length
495 var pos = 0
496
497 while pos < mylen do
498 outstr.add(chars[pos].to_upper)
499 pos += 1
500 end
501
502 return outstr.to_s
503 end
504
505 redef fun to_lower
506 do
507 var outstr = new FlatBuffer.with_capacity(self._byte_length + 1)
508
509 var mylen = _length
510 var pos = 0
511
512 while pos < mylen do
513 outstr.add(chars[pos].to_lower)
514 pos += 1
515 end
516
517 return outstr.to_s
518 end
519
520 redef fun output
521 do
522 for i in chars do i.output
523 end
524
525 ##################################################
526 # String Specific Methods #
527 ##################################################
528
529 # Low-level creation of a new string with minimal data.
530 #
531 # `_items` will be used as is, without copy, to retrieve the characters of the string.
532 # Aliasing issues is the responsibility of the caller.
533 private new with_infos(items: NativeString, byte_length, from: Int)
534 do
535 var len = items.utf8_length(from, byte_length)
536 if byte_length == len then return new ASCIIFlatString.full_data(items, byte_length, from, len)
537 return new UnicodeFlatString.full_data(items, byte_length, from, len)
538 end
539
540 # Low-level creation of a new string with all the data.
541 #
542 # `_items` will be used as is, without copy, to retrieve the characters of the string.
543 # Aliasing issues is the responsibility of the caller.
544 private new full(items: NativeString, byte_length, from, length: Int)
545 do
546 if byte_length == length then return new ASCIIFlatString.full_data(items, byte_length, from, length)
547 return new UnicodeFlatString.full_data(items, byte_length, from, length)
548 end
549
550 redef fun ==(other)
551 do
552 if not other isa FlatText then return super
553
554 if self.object_id == other.object_id then return true
555
556 var my_length = _byte_length
557
558 if other._byte_length != my_length then return false
559
560 var my_index = _first_byte
561 var its_index = other.first_byte
562
563 var last_iteration = my_index + my_length
564
565 var its_items = other._items
566 var my_items = self._items
567
568 while my_index < last_iteration do
569 if my_items[my_index] != its_items[its_index] then return false
570 my_index += 1
571 its_index += 1
572 end
573
574 return true
575 end
576
577 redef fun <(other)
578 do
579 if not other isa FlatText then return super
580
581 if self.object_id == other.object_id then return false
582
583 var myits = _items
584 var itsits = other._items
585
586 var mbt = _byte_length
587 var obt = other.byte_length
588
589 var minln = if mbt < obt then mbt else obt
590 var mst = _first_byte
591 var ost = other.first_byte
592
593 for i in [0 .. minln[ do
594 var my_curr_char = myits[mst]
595 var its_curr_char = itsits[ost]
596
597 if my_curr_char > its_curr_char then return false
598 if my_curr_char < its_curr_char then return true
599
600 mst += 1
601 ost += 1
602 end
603
604 return mbt < obt
605 end
606
607 redef fun +(o) do
608 var s = o.to_s
609 var slen = s.byte_length
610 var mlen = _byte_length
611 var nlen = mlen + slen
612 var mits = _items
613 var mifrom = _first_byte
614 if s isa FlatText then
615 var sits = s._items
616 var sifrom = s.first_byte
617 var ns = new NativeString(nlen + 1)
618 mits.copy_to(ns, mlen, mifrom, 0)
619 sits.copy_to(ns, slen, sifrom, mlen)
620 return new FlatString.full(ns, nlen, 0, _length + o.length)
621 else
622 abort
623 end
624 end
625
626 redef fun *(i) do
627 var mybtlen = _byte_length
628 var new_byte_length = mybtlen * i
629 var mylen = _length
630 var newlen = mylen * i
631 var its = _items
632 var fb = _first_byte
633 var ns = new NativeString(new_byte_length + 1)
634 ns[new_byte_length] = 0u8
635 var offset = 0
636 while i > 0 do
637 its.copy_to(ns, mybtlen, fb, offset)
638 offset += mybtlen
639 i -= 1
640 end
641 return new FlatString.full(ns, new_byte_length, 0, newlen)
642 end
643
644 redef fun hash
645 do
646 if hash_cache == null then
647 # djb2 hash algorithm
648 var h = 5381
649 var i = _first_byte
650
651 var my_items = _items
652 var max = last_byte
653
654 while i <= max do
655 h = (h << 5) + h + my_items[i].to_i
656 i += 1
657 end
658
659 hash_cache = h
660 end
661
662 return hash_cache.as(not null)
663 end
664
665 redef fun substrings do return new FlatSubstringsIter(self)
666 end
667
668 # Regular Nit UTF-8 strings
669 private class UnicodeFlatString
670 super FlatString
671
672 init full_data(items: NativeString, byte_length, from, length: Int) do
673 self._items = items
674 self._length = length
675 self._byte_length = byte_length
676 _first_byte = from
677 _bytepos = from
678 end
679
680 redef fun substring_from(from) do
681 if from >= self._length then return empty
682 if from <= 0 then return self
683 var c = char_to_byte_index(from)
684 var st = c - _first_byte
685 var fln = byte_length - st
686 return new FlatString.full(items, fln, c, _length - from)
687 end
688 end
689
690 # Special cases of String where all the characters are ASCII-based
691 #
692 # Optimizes access operations to O(1) complexity.
693 private class ASCIIFlatString
694 super FlatString
695
696 init full_data(items: NativeString, byte_length, from, length: Int) do
697 self._items = items
698 self._length = length
699 self._byte_length = byte_length
700 _first_byte = from
701 _bytepos = from
702 end
703
704 redef fun [](idx) do
705 assert idx < _byte_length and idx >= 0
706 return _items[idx + _first_byte].ascii
707 end
708
709 redef fun substring(from, count) do
710 var ln = _length
711 if count <= 0 then return ""
712 if (count + from) > ln then count = ln - from
713 if count <= 0 then return ""
714 if from < 0 then
715 count += from
716 if count <= 0 then return ""
717 from = 0
718 end
719 return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
720 end
721
722 redef fun reversed do
723 var b = new FlatBuffer.with_capacity(_byte_length + 1)
724 var i = _length - 1
725 while i >= 0 do
726 b.add self[i]
727 i -= 1
728 end
729 var s = b.to_s.as(FlatString)
730 return s
731 end
732
733 redef fun char_to_byte_index(index) do return index + _first_byte
734
735 redef fun substring_impl(from, count, end_index) do
736 return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
737 end
738
739 redef fun fetch_char_at(i) do return _items[i + _first_byte].ascii
740 end
741
742 private class FlatStringCharReverseIterator
743 super IndexedIterator[Char]
744
745 var target: FlatString
746
747 var curr_pos: Int
748
749 redef fun is_ok do return curr_pos >= 0
750
751 redef fun item do return target[curr_pos]
752
753 redef fun next do curr_pos -= 1
754
755 redef fun index do return curr_pos
756
757 end
758
759 private class FlatStringCharIterator
760 super IndexedIterator[Char]
761
762 var target: FlatString
763
764 var max: Int is noautoinit
765
766 var curr_pos: Int
767
768 init do max = target._length - 1
769
770 redef fun is_ok do return curr_pos <= max
771
772 redef fun item do return target[curr_pos]
773
774 redef fun next do curr_pos += 1
775
776 redef fun index do return curr_pos
777
778 end
779
780 private class FlatStringCharView
781 super StringCharView
782
783 redef type SELFTYPE: FlatString
784
785 redef fun [](index) do return target[index]
786
787 redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
788
789 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
790
791 end
792
793 private class FlatStringByteReverseIterator
794 super IndexedIterator[Byte]
795
796 var target: FlatString
797
798 var target_items: NativeString is noautoinit
799
800 var curr_pos: Int
801
802 init
803 do
804 var tgt = target
805 target_items = tgt._items
806 curr_pos += tgt._first_byte
807 end
808
809 redef fun is_ok do return curr_pos >= target._first_byte
810
811 redef fun item do return target_items[curr_pos]
812
813 redef fun next do curr_pos -= 1
814
815 redef fun index do return curr_pos - target._first_byte
816
817 end
818
819 private class FlatStringByteIterator
820 super IndexedIterator[Byte]
821
822 var target: FlatString
823
824 var target_items: NativeString is noautoinit
825
826 var curr_pos: Int
827
828 init
829 do
830 var tgt = target
831 target_items = tgt._items
832 curr_pos += tgt._first_byte
833 end
834
835 redef fun is_ok do return curr_pos <= target.last_byte
836
837 redef fun item do return target_items[curr_pos]
838
839 redef fun next do curr_pos += 1
840
841 redef fun index do return curr_pos - target._first_byte
842
843 end
844
845 private class FlatStringByteView
846 super StringByteView
847
848 redef type SELFTYPE: FlatString
849
850 redef fun [](index)
851 do
852 # Check that the index (+ _first_byte) is not larger than last_byte
853 # In other terms, if the index is valid
854 var target = _target
855 assert index >= 0 and index < target._byte_length
856 var ind = index + target._first_byte
857 return target._items[ind]
858 end
859
860 redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
861
862 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
863
864 end
865
866 redef class Buffer
867 redef new do return new FlatBuffer
868
869 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
870 end
871
872 # Mutable strings of characters.
873 class FlatBuffer
874 super FlatText
875 super Buffer
876
877 redef fun chars do return new FlatBufferCharView(self)
878
879 redef fun bytes do return new FlatBufferByteView(self)
880
881 private var capacity = 0
882
883 redef fun fast_cstring do return _items.fast_cstring(0)
884
885 redef fun substrings do return new FlatSubstringsIter(self)
886
887 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
888 #
889 # This happens when an operation modifies the current `Buffer` and
890 # the Copy-On-Write flag `written` is set at true.
891 private fun reset do
892 var nns = new NativeString(capacity)
893 if _byte_length != 0 then _items.copy_to(nns, _byte_length, 0, 0)
894 _items = nns
895 written = false
896 end
897
898 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
899 #
900 # Internal only, does not modify _byte_length or length, this is the caller's responsability
901 private fun rshift_bytes(from: Int, len: Int) do
902 var oit = _items
903 var nit = _items
904 var bt = _byte_length
905 if bt + len > capacity then
906 capacity = capacity * 2 + 2
907 nit = new NativeString(capacity)
908 oit.copy_to(nit, 0, 0, from)
909 end
910 oit.copy_to(nit, bt - from, from, from + len)
911 end
912
913 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
914 #
915 # Internal only, does not modify _byte_length or length, this is the caller's responsability
916 private fun lshift_bytes(from: Int, len: Int) do
917 var it = _items
918 it.copy_to(it, _byte_length - from, from, from - len)
919 end
920
921 redef fun []=(index, item)
922 do
923 assert index >= 0 and index <= _length
924 if written then reset
925 if index == _length then
926 add item
927 return
928 end
929 var it = _items
930 var ip = it.char_to_byte_index(index)
931 var c = it.char_at(ip)
932 var clen = c.u8char_len
933 var itemlen = item.u8char_len
934 var size_diff = itemlen - clen
935 if size_diff > 0 then
936 rshift_bytes(ip + clen, size_diff)
937 else if size_diff < 0 then
938 lshift_bytes(ip + clen, -size_diff)
939 end
940 _byte_length += size_diff
941 it.set_char_at(ip, item)
942 end
943
944 redef fun add(c)
945 do
946 if written then reset
947 var clen = c.u8char_len
948 var bt = _byte_length
949 enlarge(bt + clen)
950 _items.set_char_at(bt, c)
951 _byte_length += clen
952 _length += 1
953 end
954
955 redef fun clear do
956 _byte_length = 0
957 _length = 0
958 if written then
959 _capacity = 16
960 reset
961 end
962 end
963
964 redef fun empty do return new Buffer
965
966 redef fun enlarge(cap)
967 do
968 var c = capacity
969 if cap <= c then return
970 if c <= 16 then c = 16
971 while c <= cap do c = c * 2
972 # The COW flag can be set at false here, since
973 # it does a copy of the current `Buffer`
974 written = false
975 var bln = _byte_length
976 var a = new NativeString(c)
977 if bln > 0 then
978 var it = _items
979 if bln > 0 then it.copy_to(a, bln, 0, 0)
980 end
981 _items = a
982 capacity = c
983 end
984
985 redef fun to_s
986 do
987 written = true
988 var bln = _byte_length
989 if bln == 0 then _items = new NativeString(1)
990 return new FlatString.full(_items, bln, 0, _length)
991 end
992
993 redef fun to_cstring
994 do
995 var bln = _byte_length
996 var new_native = new NativeString(bln + 1)
997 new_native[bln] = 0u8
998 if _length > 0 then _items.copy_to(new_native, bln, 0, 0)
999 return new_native
1000 end
1001
1002 # Create a new empty string.
1003 init do end
1004
1005 # Low-level creation a new buffer with given data.
1006 #
1007 # `_items` will be used as is, without copy, to store the characters of the buffer.
1008 # Aliasing issues is the responsibility of the caller.
1009 #
1010 # If `_items` is shared, `written` should be set to true after the creation
1011 # so that a modification will do a copy-on-write.
1012 private init with_infos(items: NativeString, capacity, byte_length, length: Int)
1013 do
1014 self._items = items
1015 self.capacity = capacity
1016 self._byte_length = byte_length
1017 self._length = length
1018 end
1019
1020 # Create a new string copied from `s`.
1021 init from(s: Text)
1022 do
1023 _items = new NativeString(s.byte_length)
1024 for i in s.substrings do i._items.copy_to(_items, i._byte_length, first_byte, 0)
1025 _byte_length = s.byte_length
1026 _length = s.length
1027 _capacity = _byte_length
1028 end
1029
1030 # Create a new empty string with a given capacity.
1031 init with_capacity(cap: Int)
1032 do
1033 assert cap >= 0
1034 _items = new NativeString(cap)
1035 capacity = cap
1036 _byte_length = 0
1037 end
1038
1039 redef fun append(s)
1040 do
1041 if s.is_empty then return
1042 var sl = s.byte_length
1043 var nln = _byte_length + sl
1044 enlarge(nln)
1045 if s isa FlatText then
1046 s._items.copy_to(_items, sl, s.first_byte, _byte_length)
1047 else
1048 for i in s.substrings do append i
1049 return
1050 end
1051 _byte_length = nln
1052 _length += s.length
1053 end
1054
1055 # Copies the content of self in `dest`
1056 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
1057 do
1058 var self_chars = self.chars
1059 var dest_chars = dest.chars
1060 for i in [0..len-1] do
1061 dest_chars[new_start+i] = self_chars[start+i]
1062 end
1063 end
1064
1065 redef fun substring(from, count)
1066 do
1067 assert count >= 0
1068 if from < 0 then from = 0
1069 if (from + count) > _length then count = _length - from
1070 if count <= 0 then return new Buffer
1071 var its = _items
1072 var bytefrom = its.char_to_byte_index(from)
1073 var byteto = its.char_to_byte_index(count + from - 1)
1074 byteto += its.char_at(byteto).u8char_len - 1
1075 var byte_length = byteto - bytefrom + 1
1076 var r_items = new NativeString(byte_length)
1077 its.copy_to(r_items, byte_length, bytefrom, 0)
1078 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
1079 end
1080
1081 redef fun append_substring_impl(s, from, length) do
1082 if length <= 0 then return
1083 if not s isa FlatText then
1084 super
1085 return
1086 end
1087 var bytest = s.char_to_byte_index(from)
1088 var bytend = s.char_to_byte_index(from + length - 1)
1089 var btln = bytend - bytest + 1
1090 enlarge(btln + _byte_length)
1091 s._items.copy_to(_items, btln, bytest, _byte_length)
1092 _byte_length += btln
1093 _length += length
1094 end
1095
1096 redef fun reverse
1097 do
1098 written = false
1099 var ns = new FlatBuffer.with_capacity(capacity)
1100 for i in chars.reverse_iterator do ns.add i
1101 _items = ns._items
1102 end
1103
1104 redef fun times(repeats)
1105 do
1106 var bln = _byte_length
1107 var x = new FlatString.full(_items, bln, 0, _length)
1108 for i in [1 .. repeats[ do
1109 append(x)
1110 end
1111 end
1112
1113 redef fun upper
1114 do
1115 if written then reset
1116 for i in [0 .. _length[ do self[i] = self[i].to_upper
1117 end
1118
1119 redef fun lower
1120 do
1121 if written then reset
1122 for i in [0 .. _length[ do self[i] = self[i].to_lower
1123 end
1124 end
1125
1126 private class FlatBufferByteReverseIterator
1127 super IndexedIterator[Byte]
1128
1129 var target: FlatBuffer
1130
1131 var target_items: NativeString is noautoinit
1132
1133 var curr_pos: Int
1134
1135 init do target_items = target._items
1136
1137 redef fun index do return curr_pos
1138
1139 redef fun is_ok do return curr_pos >= 0
1140
1141 redef fun item do return target_items[curr_pos]
1142
1143 redef fun next do curr_pos -= 1
1144
1145 end
1146
1147 private class FlatBufferByteView
1148 super BufferByteView
1149
1150 redef type SELFTYPE: FlatBuffer
1151
1152 redef fun [](index) do return target._items[index]
1153
1154 redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
1155
1156 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
1157
1158 end
1159
1160 private class FlatBufferByteIterator
1161 super IndexedIterator[Byte]
1162
1163 var target: FlatBuffer
1164
1165 var target_items: NativeString is noautoinit
1166
1167 var curr_pos: Int
1168
1169 init do target_items = target._items
1170
1171 redef fun index do return curr_pos
1172
1173 redef fun is_ok do return curr_pos < target._byte_length
1174
1175 redef fun item do return target_items[curr_pos]
1176
1177 redef fun next do curr_pos += 1
1178
1179 end
1180
1181 private class FlatBufferCharReverseIterator
1182 super IndexedIterator[Char]
1183
1184 var target: FlatBuffer
1185
1186 var curr_pos: Int
1187
1188 redef fun index do return curr_pos
1189
1190 redef fun is_ok do return curr_pos >= 0
1191
1192 redef fun item do return target[curr_pos]
1193
1194 redef fun next do curr_pos -= 1
1195
1196 end
1197
1198 private class FlatBufferCharView
1199 super BufferCharView
1200
1201 redef type SELFTYPE: FlatBuffer
1202
1203 redef fun [](index) do return target[index]
1204
1205 redef fun []=(index, item)
1206 do
1207 assert index >= 0 and index <= length
1208 if index == length then
1209 add(item)
1210 return
1211 end
1212 target[index] = item
1213 end
1214
1215 redef fun push(c)
1216 do
1217 target.add(c)
1218 end
1219
1220 redef fun add(c)
1221 do
1222 target.add(c)
1223 end
1224
1225 fun enlarge(cap: Int)
1226 do
1227 target.enlarge(cap)
1228 end
1229
1230 redef fun append(s)
1231 do
1232 var s_length = s.length
1233 if target.capacity < s.length then enlarge(s_length + target._length)
1234 for i in s do target.add i
1235 end
1236
1237 redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
1238
1239 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
1240
1241 end
1242
1243 private class FlatBufferCharIterator
1244 super IndexedIterator[Char]
1245
1246 var target: FlatBuffer
1247
1248 var max: Int is noautoinit
1249
1250 var curr_pos: Int
1251
1252 init do max = target._length - 1
1253
1254 redef fun index do return curr_pos
1255
1256 redef fun is_ok do return curr_pos <= max
1257
1258 redef fun item do return target[curr_pos]
1259
1260 redef fun next do curr_pos += 1
1261
1262 end
1263
1264 redef class NativeString
1265 redef fun to_s
1266 do
1267 return to_s_with_length(cstring_length)
1268 end
1269
1270 redef fun to_s_with_length(length)
1271 do
1272 assert length >= 0
1273 return clean_utf8(length)
1274 end
1275
1276 redef fun to_s_full(byte_length, unilen) do
1277 return new FlatString.full(self, byte_length, 0, unilen)
1278 end
1279
1280 redef fun to_s_unsafe(len) do
1281 if len == null then len = cstring_length
1282 return new FlatString.with_infos(self, len, 0)
1283 end
1284
1285 redef fun to_s_with_copy do return to_s_with_copy_and_length(cstring_length)
1286
1287 # Get a `String` from `length` bytes at `self` copied into Nit memory
1288 fun to_s_with_copy_and_length(length: Int): String
1289 do
1290 var r = clean_utf8(length)
1291 if r.items != self then return r
1292 var new_self = new NativeString(length + 1)
1293 copy_to(new_self, length, 0, 0)
1294 var str = new FlatString.with_infos(new_self, length, 0)
1295 new_self[length] = 0u8
1296 return str
1297 end
1298
1299 # Cleans a NativeString if necessary
1300 fun clean_utf8(len: Int): FlatString do
1301 var replacements: nullable Array[Int] = null
1302 var end_length = len
1303 var pos = 0
1304 var chr_ln = 0
1305 var rem = len
1306 while rem > 0 do
1307 while rem >= 4 do
1308 var i = fetch_4_chars(pos)
1309 if i & 0x80808080 != 0 then break
1310 pos += 4
1311 chr_ln += 4
1312 rem -= 4
1313 end
1314 if rem == 0 then break
1315 var b = self[pos]
1316 if b & 0x80u8 == 0x00u8 then
1317 pos += 1
1318 chr_ln += 1
1319 rem -= 1
1320 continue
1321 end
1322 var nxst = length_of_char_at(pos)
1323 var ok_st: Bool
1324 if nxst == 1 then
1325 ok_st = b & 0x80u8 == 0u8
1326 else if nxst == 2 then
1327 ok_st = b & 0xE0u8 == 0xC0u8
1328 else if nxst == 3 then
1329 ok_st = b & 0xF0u8 == 0xE0u8
1330 else
1331 ok_st = b & 0xF8u8 == 0xF0u8
1332 end
1333 if not ok_st then
1334 if replacements == null then replacements = new Array[Int]
1335 replacements.add pos
1336 end_length += 2
1337 pos += 1
1338 rem -= 1
1339 chr_ln += 1
1340 continue
1341 end
1342 var ok_c: Bool
1343 var c = char_at(pos)
1344 var cp = c.code_point
1345 if nxst == 1 then
1346 ok_c = cp >= 0 and cp <= 0x7F
1347 else if nxst == 2 then
1348 ok_c = cp >= 0x80 and cp <= 0x7FF
1349 else if nxst == 3 then
1350 ok_c = cp >= 0x800 and cp <= 0xFFFF
1351 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1352 else
1353 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1354 end
1355 if not ok_c then
1356 if replacements == null then replacements = new Array[Int]
1357 replacements.add pos
1358 end_length += 2
1359 pos += 1
1360 chr_ln += 1
1361 rem -= 1
1362 continue
1363 end
1364 var clen = c.u8char_len
1365 pos += clen
1366 rem -= clen
1367 chr_ln += 1
1368 end
1369 var ret = self
1370 if end_length != len then
1371 ret = new NativeString(end_length)
1372 var old_repl = 0
1373 var off = 0
1374 var repls = replacements.as(not null)
1375 var r = repls.items.as(not null)
1376 var imax = repls.length
1377 for i in [0 .. imax[ do
1378 var repl_pos = r[i]
1379 var chkln = repl_pos - old_repl
1380 copy_to(ret, chkln, old_repl, off)
1381 off += chkln
1382 ret[off] = 0xEFu8
1383 ret[off + 1] = 0xBFu8
1384 ret[off + 2] = 0xBDu8
1385 old_repl = repl_pos + 1
1386 off += 3
1387 end
1388 copy_to(ret, len - old_repl, old_repl, off)
1389 end
1390 return new FlatString.full(ret, end_length, 0, chr_ln)
1391 end
1392
1393 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1394 #
1395 # Very unsafe, make sure to have room for this char prior to calling this function.
1396 private fun set_char_at(pos: Int, c: Char) do
1397 var cp = c.code_point
1398 if cp < 128 then
1399 self[pos] = cp.to_b
1400 return
1401 end
1402 var ln = c.u8char_len
1403 if ln == 2 then
1404 self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b
1405 self[pos + 1] = (0x80 | (cp & 0x3F)).to_b
1406 else if ln == 3 then
1407 self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b
1408 self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
1409 self[pos + 2] = (0x80 | (cp & 0x3F)).to_b
1410 else if ln == 4 then
1411 self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b
1412 self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b
1413 self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
1414 self[pos + 3] = (0x80 | (cp & 0x3F)).to_b
1415 end
1416 end
1417 end
1418
1419 redef class Int
1420 # return displayable int in base 10 and signed
1421 #
1422 # assert 1.to_s == "1"
1423 # assert (-123).to_s == "-123"
1424 redef fun to_s do
1425 # Fast case for common numbers
1426 if self == 0 then return "0"
1427 if self == 1 then return "1"
1428
1429 var nslen = int_to_s_len
1430 var ns = new NativeString(nslen + 1)
1431 ns[nslen] = 0u8
1432 native_int_to_s(ns, nslen + 1)
1433 return new FlatString.full(ns, nslen, 0, nslen)
1434 end
1435 end
1436
1437 redef class Array[E]
1438
1439 # Fast implementation
1440 redef fun plain_to_s
1441 do
1442 var l = _length
1443 if l == 0 then return ""
1444 var its = _items.as(not null)
1445 var first = its[0]
1446 if l == 1 then if first == null then return "" else return first.to_s
1447 var na = new NativeArray[String](l)
1448 var i = 0
1449 var sl = 0
1450 var mypos = 0
1451 while i < l do
1452 var itsi = its[i]
1453 if itsi == null then
1454 i += 1
1455 continue
1456 end
1457 var tmp = itsi.to_s
1458 sl += tmp.byte_length
1459 na[mypos] = tmp
1460 i += 1
1461 mypos += 1
1462 end
1463 var ns = new NativeString(sl + 1)
1464 ns[sl] = 0u8
1465 i = 0
1466 var off = 0
1467 while i < mypos do
1468 var tmp = na[i]
1469 if tmp isa FlatString then
1470 var tpl = tmp._byte_length
1471 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1472 off += tpl
1473 else
1474 for j in tmp.substrings do
1475 var s = j.as(FlatString)
1476 var slen = s._byte_length
1477 s._items.copy_to(ns, slen, s._first_byte, off)
1478 off += slen
1479 end
1480 end
1481 i += 1
1482 end
1483 return new FlatString.with_infos(ns, sl, 0)
1484 end
1485 end
1486
1487 redef class NativeArray[E]
1488 redef fun native_to_s do
1489 assert self isa NativeArray[String]
1490 var l = length
1491 var na = self
1492 var i = 0
1493 var sl = 0
1494 var mypos = 0
1495 while i < l do
1496 sl += na[i].byte_length
1497 i += 1
1498 mypos += 1
1499 end
1500 var ns = new NativeString(sl + 1)
1501 ns[sl] = 0u8
1502 i = 0
1503 var off = 0
1504 while i < mypos do
1505 var tmp = na[i]
1506 if tmp isa FlatString then
1507 var tpl = tmp._byte_length
1508 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1509 off += tpl
1510 else
1511 for j in tmp.substrings do
1512 var s = j.as(FlatString)
1513 var slen = s._byte_length
1514 s._items.copy_to(ns, slen, s._first_byte, off)
1515 off += slen
1516 end
1517 end
1518 i += 1
1519 end
1520 return new FlatString.with_infos(ns, sl, 0)
1521 end
1522 end
1523
1524 redef class Map[K,V]
1525 redef fun join(sep, couple_sep)
1526 do
1527 if is_empty then return ""
1528
1529 var s = new Buffer # Result
1530
1531 # Concat first item
1532 var i = iterator
1533 var k = i.key
1534 var e = i.item
1535 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1536
1537 # Concat other _items
1538 i.next
1539 while i.is_ok do
1540 s.append(sep)
1541 k = i.key
1542 e = i.item
1543 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1544 i.next
1545 end
1546 return s.to_s
1547 end
1548 end