lib/core: Added `to_s_unsafe` method to `NativeString` which does not cleans a `Nativ...
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 # First byte of the NativeString
40 protected fun first_byte: Int do return 0
41
42 # Last byte of the NativeString
43 protected fun last_byte: Int do return first_byte + _bytelen - 1
44
45 # Cache of the latest position (char) explored in the string
46 var position: Int = 0
47
48 # Cached position (bytes) in the NativeString underlying the String
49 var bytepos: Int = 0
50
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index(index: Int): Int do
53 var ln = length
54 assert index >= 0
55 assert index < ln
56
57 var pos = _position
58 # Find best insertion point
59 var delta_begin = index
60 var delta_end = (ln - 1) - index
61 var delta_cache = (pos - index).abs
62 var min = delta_begin
63 var its = _items
64
65 if delta_cache < min then min = delta_cache
66 if delta_end < min then min = delta_end
67
68 var ns_i: Int
69 var my_i: Int
70
71 if min == delta_begin then
72 ns_i = first_byte
73 my_i = 0
74 else if min == delta_cache then
75 ns_i = _bytepos
76 my_i = pos
77 else
78 ns_i = its.find_beginning_of_char_at(last_byte)
79 my_i = length - 1
80 end
81
82 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
83
84 _position = index
85 _bytepos = ns_i
86
87 return ns_i
88 end
89
90 # By escaping `self` to HTML, how many more bytes will be needed ?
91 fun chars_to_html_escape: Int do
92 var its = _items
93 var max = last_byte
94 var pos = first_byte
95 var endlen = 0
96 while pos <= max do
97 var c = its[pos]
98 if c == 0x3Cu8 then
99 endlen += 3
100 else if c == 0x3Eu8 then
101 endlen += 3
102 else if c == 0x26u8 then
103 endlen += 4
104 else if c == 0x22u8 then
105 endlen += 4
106 else if c == 0x27u8 then
107 endlen += 4
108 else if c == 0x2Fu8 then
109 endlen += 4
110 end
111 pos += 1
112 end
113 return endlen
114 end
115
116 redef fun html_escape
117 do
118 var extra = chars_to_html_escape
119 if extra == 0 then return to_s
120 var its = _items
121 var max = last_byte
122 var pos = first_byte
123 var nlen = extra + _bytelen
124 var nits = new NativeString(nlen)
125 var outpos = 0
126 while pos <= max do
127 var c = its[pos]
128 # Special codes:
129 # Some HTML characters are used as meta-data, they need
130 # to be replaced by an HTML-Escaped equivalent
131 #
132 # * 0x3C (<) => &lt;
133 # * 0x3E (>) => &gt;
134 # * 0x26 (&) => &amp;
135 # * 0x22 (") => &#34;
136 # * 0x27 (') => &#39;
137 # * 0x2F (/) => &#47;
138 if c == 0x3Cu8 then
139 nits[outpos] = 0x26u8
140 nits[outpos + 1] = 0x6Cu8
141 nits[outpos + 2] = 0x74u8
142 nits[outpos + 3] = 0x3Bu8
143 outpos += 4
144 else if c == 0x3Eu8 then
145 nits[outpos] = 0x26u8
146 nits[outpos + 1] = 0x67u8
147 nits[outpos + 2] = 0x74u8
148 nits[outpos + 3] = 0x3Bu8
149 outpos += 4
150 else if c == 0x26u8 then
151 nits[outpos] = 0x26u8
152 nits[outpos + 1] = 0x61u8
153 nits[outpos + 2] = 0x6Du8
154 nits[outpos + 3] = 0x70u8
155 nits[outpos + 4] = 0x3Bu8
156 outpos += 5
157 else if c == 0x22u8 then
158 nits[outpos] = 0x26u8
159 nits[outpos + 1] = 0x23u8
160 nits[outpos + 2] = 0x33u8
161 nits[outpos + 3] = 0x34u8
162 nits[outpos + 4] = 0x3Bu8
163 outpos += 5
164 else if c == 0x27u8 then
165 nits[outpos] = 0x26u8
166 nits[outpos + 1] = 0x23u8
167 nits[outpos + 2] = 0x33u8
168 nits[outpos + 3] = 0x39u8
169 nits[outpos + 4] = 0x3Bu8
170 outpos += 5
171 else if c == 0x2Fu8 then
172 nits[outpos] = 0x26u8
173 nits[outpos + 1] = 0x23u8
174 nits[outpos + 2] = 0x34u8
175 nits[outpos + 3] = 0x37u8
176 nits[outpos + 4] = 0x3Bu8
177 outpos += 5
178 else
179 nits[outpos] = c
180 outpos += 1
181 end
182 pos += 1
183 end
184 var s = new FlatString.with_infos(nits, nlen, 0)
185 return s
186 end
187
188 # By escaping `self` to C, how many more bytes will be needed ?
189 #
190 # This enables a double-optimization in `escape_to_c` since if this
191 # method returns 0, then `self` does not need escaping and can be
192 # returned as-is
193 fun chars_to_escape_to_c: Int do
194 var its = _items
195 var max = last_byte
196 var pos = first_byte
197 var req_esc = 0
198 while pos <= max do
199 var c = its[pos]
200 if c == 0x0Au8 then
201 req_esc += 1
202 else if c == 0x09u8 then
203 req_esc += 1
204 else if c == 0x22u8 then
205 req_esc += 1
206 else if c == 0x27u8 then
207 req_esc += 1
208 else if c == 0x5Cu8 then
209 req_esc += 1
210 else if c < 32u8 then
211 req_esc += 3
212 end
213 pos += 1
214 end
215 return req_esc
216 end
217
218 redef fun escape_to_c do
219 var ln_extra = chars_to_escape_to_c
220 if ln_extra == 0 then return self.to_s
221 var its = _items
222 var max = last_byte
223 var nlen = _bytelen + ln_extra
224 var nns = new NativeString(nlen)
225 var pos = first_byte
226 var opos = 0
227 while pos <= max do
228 var c = its[pos]
229 # Special codes:
230 #
231 # Any byte with value < 32 is a control character
232 # All their uses will be replaced by their octal
233 # value in C.
234 #
235 # There are two exceptions however:
236 #
237 # * 0x09 => \t
238 # * 0x0A => \n
239 #
240 # Aside from the code points above, the following are:
241 #
242 # * 0x22 => \"
243 # * 0x27 => \'
244 # * 0x5C => \\
245 if c == 0x09u8 then
246 nns[opos] = 0x5Cu8
247 nns[opos + 1] = 0x74u8
248 opos += 2
249 else if c == 0x0Au8 then
250 nns[opos] = 0x5Cu8
251 nns[opos + 1] = 0x6Eu8
252 opos += 2
253 else if c == 0x22u8 then
254 nns[opos] = 0x5Cu8
255 nns[opos + 1] = 0x22u8
256 opos += 2
257 else if c == 0x27u8 then
258 nns[opos] = 0x5Cu8
259 nns[opos + 1] = 0x27u8
260 opos += 2
261 else if c == 0x5Cu8 then
262 nns[opos] = 0x5Cu8
263 nns[opos + 1] = 0x5Cu8
264 opos += 2
265 else if c < 32u8 then
266 nns[opos] = 0x5Cu8
267 nns[opos + 1] = 0x30u8
268 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
269 nns[opos + 3] = (c & 0x07u8) + 0x30u8
270 opos += 4
271 else
272 nns[opos] = c
273 opos += 1
274 end
275 pos += 1
276 end
277 return nns.to_s_unsafe(nlen)
278 end
279
280 redef fun [](index) do return _items.char_at(char_to_byte_index(index))
281
282 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
283 #
284 # assert "ff".to_hex == 255
285 redef fun to_hex(pos, ln) do
286 var res = 0
287 if pos == null then pos = 0
288 if ln == null then ln = length - pos
289 pos = char_to_byte_index(pos)
290 var its = _items
291 var max = pos + ln
292 for i in [pos .. max[ do
293 res <<= 4
294 res += its[i].ascii.from_hex
295 end
296 return res
297 end
298 end
299
300 # Immutable strings of characters.
301 class FlatString
302 super FlatText
303 super String
304
305 # Index at which `self` begins in `_items`, inclusively
306 redef var first_byte is noinit
307
308 redef var chars = new FlatStringCharView(self) is lazy
309
310 redef var bytes = new FlatStringByteView(self) is lazy
311
312 redef var to_cstring is lazy do
313 var blen = _bytelen
314 var new_items = new NativeString(blen + 1)
315 _items.copy_to(new_items, blen, _first_byte, 0)
316 new_items[blen] = 0u8
317 return new_items
318 end
319
320 redef fun reversed
321 do
322 var b = new FlatBuffer.with_capacity(_bytelen + 1)
323 for i in [0 .. _length[.step(-1) do
324 b.add self[i]
325 end
326 var s = b.to_s.as(FlatString)
327 s._length = self._length
328 return s
329 end
330
331 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
332
333 redef fun substring(from, count)
334 do
335 assert count >= 0
336
337 if from < 0 then
338 count += from
339 if count < 0 then count = 0
340 from = 0
341 end
342
343 if (count + from) > length then count = length - from
344 if count <= 0 then return ""
345 var end_index = from + count - 1
346
347 var bytefrom = char_to_byte_index(from)
348 var byteto = char_to_byte_index(end_index)
349 var its = _items
350 byteto += its.length_of_char_at(byteto) - 1
351
352 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, byteto, count)
353 return s
354 end
355
356 redef fun empty do return "".as(FlatString)
357
358 redef fun to_upper
359 do
360 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
361
362 var mylen = _length
363 var pos = 0
364
365 while pos < mylen do
366 outstr.add(chars[pos].to_upper)
367 pos += 1
368 end
369
370 return outstr.to_s
371 end
372
373 redef fun to_lower
374 do
375 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
376
377 var mylen = _length
378 var pos = 0
379
380 while pos < mylen do
381 outstr.add(chars[pos].to_lower)
382 pos += 1
383 end
384
385 return outstr.to_s
386 end
387
388 redef fun output
389 do
390 for i in chars do i.output
391 end
392
393 ##################################################
394 # String Specific Methods #
395 ##################################################
396
397 # Low-level creation of a new string with minimal data.
398 #
399 # `_items` will be used as is, without copy, to retrieve the characters of the string.
400 # Aliasing issues is the responsibility of the caller.
401 private init with_infos(items: NativeString, bytelen, from: Int)
402 do
403 self._items = items
404 self._bytelen = bytelen
405 _first_byte = from
406 _bytepos = from
407 _length = _items.utf8_length(_first_byte, last_byte)
408 end
409
410 # Low-level creation of a new string with all the data.
411 #
412 # `_items` will be used as is, without copy, to retrieve the characters of the string.
413 # Aliasing issues is the responsibility of the caller.
414 private init full(items: NativeString, bytelen, from, length: Int)
415 do
416 self._items = items
417 self._length = length
418 self._bytelen = bytelen
419 _first_byte = from
420 _bytepos = from
421 end
422
423 redef fun ==(other)
424 do
425 if not other isa FlatText then return super
426
427 if self.object_id == other.object_id then return true
428
429 var my_length = _bytelen
430
431 if other._bytelen != my_length then return false
432
433 var my_index = _first_byte
434 var its_index = other.first_byte
435
436 var last_iteration = my_index + my_length
437
438 var its_items = other._items
439 var my_items = self._items
440
441 while my_index < last_iteration do
442 if my_items[my_index] != its_items[its_index] then return false
443 my_index += 1
444 its_index += 1
445 end
446
447 return true
448 end
449
450 redef fun <(other)
451 do
452 if not other isa FlatText then return super
453
454 if self.object_id == other.object_id then return false
455
456 var myits = _items
457 var itsits = other._items
458
459 var mbt = _bytelen
460 var obt = other.bytelen
461
462 var minln = if mbt < obt then mbt else obt
463 var mst = _first_byte
464 var ost = other.first_byte
465
466 for i in [0 .. minln[ do
467 var my_curr_char = myits[mst]
468 var its_curr_char = itsits[ost]
469
470 if my_curr_char > its_curr_char then return false
471 if my_curr_char < its_curr_char then return true
472
473 mst += 1
474 ost += 1
475 end
476
477 return mbt < obt
478 end
479
480 redef fun +(o) do
481 var s = o.to_s
482 var slen = s.bytelen
483 var mlen = _bytelen
484 var nlen = mlen + slen
485 var mits = _items
486 var mifrom = _first_byte
487 if s isa FlatText then
488 var sits = s._items
489 var sifrom = s.first_byte
490 var ns = new NativeString(nlen + 1)
491 mits.copy_to(ns, mlen, mifrom, 0)
492 sits.copy_to(ns, slen, sifrom, mlen)
493 return new FlatString.full(ns, nlen, 0, _length + o.length)
494 else
495 abort
496 end
497 end
498
499 redef fun *(i) do
500 var mybtlen = _bytelen
501 var new_bytelen = mybtlen * i
502 var mylen = _length
503 var newlen = mylen * i
504 var its = _items
505 var fb = _first_byte
506 var ns = new NativeString(new_bytelen + 1)
507 ns[new_bytelen] = 0u8
508 var offset = 0
509 while i > 0 do
510 its.copy_to(ns, mybtlen, fb, offset)
511 offset += mybtlen
512 i -= 1
513 end
514 return new FlatString.full(ns, new_bytelen, 0, newlen)
515 end
516
517
518 redef fun hash
519 do
520 if hash_cache == null then
521 # djb2 hash algorithm
522 var h = 5381
523 var i = _first_byte
524
525 var my_items = _items
526 var max = last_byte
527
528 while i <= max do
529 h = (h << 5) + h + my_items[i].to_i
530 i += 1
531 end
532
533 hash_cache = h
534 end
535
536 return hash_cache.as(not null)
537 end
538
539 redef fun substrings do return new FlatSubstringsIter(self)
540 end
541
542 private class FlatStringCharReverseIterator
543 super IndexedIterator[Char]
544
545 var target: FlatString
546
547 var curr_pos: Int
548
549 redef fun is_ok do return curr_pos >= 0
550
551 redef fun item do return target[curr_pos]
552
553 redef fun next do curr_pos -= 1
554
555 redef fun index do return curr_pos
556
557 end
558
559 private class FlatStringCharIterator
560 super IndexedIterator[Char]
561
562 var target: FlatString
563
564 var max: Int is noautoinit
565
566 var curr_pos: Int
567
568 init do max = target._length - 1
569
570 redef fun is_ok do return curr_pos <= max
571
572 redef fun item do return target[curr_pos]
573
574 redef fun next do curr_pos += 1
575
576 redef fun index do return curr_pos
577
578 end
579
580 private class FlatStringCharView
581 super StringCharView
582
583 redef type SELFTYPE: FlatString
584
585 redef fun [](index) do return target[index]
586
587 redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
588
589 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
590
591 end
592
593 private class FlatStringByteReverseIterator
594 super IndexedIterator[Byte]
595
596 var target: FlatString
597
598 var target_items: NativeString is noautoinit
599
600 var curr_pos: Int
601
602 init
603 do
604 var tgt = target
605 target_items = tgt._items
606 curr_pos += tgt._first_byte
607 end
608
609 redef fun is_ok do return curr_pos >= target._first_byte
610
611 redef fun item do return target_items[curr_pos]
612
613 redef fun next do curr_pos -= 1
614
615 redef fun index do return curr_pos - target._first_byte
616
617 end
618
619 private class FlatStringByteIterator
620 super IndexedIterator[Byte]
621
622 var target: FlatString
623
624 var target_items: NativeString is noautoinit
625
626 var curr_pos: Int
627
628 init
629 do
630 var tgt = target
631 target_items = tgt._items
632 curr_pos += tgt._first_byte
633 end
634
635 redef fun is_ok do return curr_pos <= target.last_byte
636
637 redef fun item do return target_items[curr_pos]
638
639 redef fun next do curr_pos += 1
640
641 redef fun index do return curr_pos - target._first_byte
642
643 end
644
645 private class FlatStringByteView
646 super StringByteView
647
648 redef type SELFTYPE: FlatString
649
650 redef fun [](index)
651 do
652 # Check that the index (+ _first_byte) is not larger than last_byte
653 # In other terms, if the index is valid
654 assert index >= 0
655 var target = self.target
656 var ind = index + target._first_byte
657 assert ind <= target.last_byte
658 return target._items[ind]
659 end
660
661 redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
662
663 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
664
665 end
666
667 redef class Buffer
668 redef new do return new FlatBuffer
669
670 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
671 end
672
673 # Mutable strings of characters.
674 class FlatBuffer
675 super FlatText
676 super Buffer
677
678 redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
679
680 redef var bytes = new FlatBufferByteView(self) is lazy
681
682 private var char_cache: Int = -1
683
684 private var byte_cache: Int = -1
685
686 private var capacity = 0
687
688 # Real items, used as cache for when to_cstring is called
689 private var real_items: NativeString is noinit
690
691 redef fun fast_cstring do return _items.fast_cstring(0)
692
693 redef fun substrings do return new FlatSubstringsIter(self)
694
695 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
696 #
697 # This happens when an operation modifies the current `Buffer` and
698 # the Copy-On-Write flag `written` is set at true.
699 private fun reset do
700 var nns = new NativeString(capacity)
701 if _bytelen != 0 then _items.copy_to(nns, _bytelen, 0, 0)
702 _items = nns
703 written = false
704 end
705
706 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
707 #
708 # Internal only, does not modify _bytelen or length, this is the caller's responsability
709 private fun rshift_bytes(from: Int, len: Int) do
710 var oit = _items
711 var nit = _items
712 var bt = _bytelen
713 if bt + len > capacity then
714 capacity = capacity * 2 + 2
715 nit = new NativeString(capacity)
716 oit.copy_to(nit, 0, 0, from)
717 end
718 oit.copy_to(nit, bt - from, from, from + len)
719 end
720
721 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
722 #
723 # Internal only, does not modify _bytelen or length, this is the caller's responsability
724 private fun lshift_bytes(from: Int, len: Int) do
725 var it = _items
726 it.copy_to(it, _bytelen - from, from, from - len)
727 end
728
729 redef fun []=(index, item)
730 do
731 assert index >= 0 and index <= _length
732 if written then reset
733 is_dirty = true
734 if index == _length then
735 add item
736 return
737 end
738 var it = _items
739 var ip = it.char_to_byte_index(index)
740 var c = it.char_at(ip)
741 var clen = c.u8char_len
742 var itemlen = item.u8char_len
743 var size_diff = itemlen - clen
744 if size_diff > 0 then
745 rshift_bytes(ip + clen, size_diff)
746 else if size_diff < 0 then
747 lshift_bytes(ip + clen, -size_diff)
748 end
749 _bytelen += size_diff
750 bytepos += size_diff
751 it.set_char_at(ip, item)
752 end
753
754 redef fun add(c)
755 do
756 if written then reset
757 is_dirty = true
758 var clen = c.u8char_len
759 var bt = _bytelen
760 enlarge(bt + clen)
761 _items.set_char_at(bt, c)
762 _bytelen += clen
763 _length += 1
764 end
765
766 redef fun clear do
767 is_dirty = true
768 if written then reset
769 _bytelen = 0
770 _length = 0
771 end
772
773 redef fun empty do return new Buffer
774
775 redef fun enlarge(cap)
776 do
777 var c = capacity
778 if cap <= c then return
779 while c <= cap do c = c * 2 + 2
780 # The COW flag can be set at false here, since
781 # it does a copy of the current `Buffer`
782 written = false
783 var bln = _bytelen
784 var a = new NativeString(c+1)
785 if bln > 0 then
786 var it = _items
787 if bln > 0 then it.copy_to(a, bln, 0, 0)
788 end
789 _items = a
790 capacity = c
791 end
792
793 redef fun to_s
794 do
795 written = true
796 var bln = _bytelen
797 if bln == 0 then _items = new NativeString(1)
798 return new FlatString.full(_items, bln, 0, _length)
799 end
800
801 redef fun to_cstring
802 do
803 if is_dirty then
804 var bln = _bytelen
805 var new_native = new NativeString(bln + 1)
806 new_native[bln] = 0u8
807 if _length > 0 then _items.copy_to(new_native, bln, 0, 0)
808 real_items = new_native
809 is_dirty = false
810 end
811 return real_items
812 end
813
814 # Create a new empty string.
815 init do end
816
817 # Low-level creation a new buffer with given data.
818 #
819 # `_items` will be used as is, without copy, to store the characters of the buffer.
820 # Aliasing issues is the responsibility of the caller.
821 #
822 # If `_items` is shared, `written` should be set to true after the creation
823 # so that a modification will do a copy-on-write.
824 private init with_infos(items: NativeString, capacity, bytelen, length: Int)
825 do
826 self._items = items
827 self.capacity = capacity
828 self._bytelen = bytelen
829 self._length = length
830 end
831
832 # Create a new string copied from `s`.
833 init from(s: Text)
834 do
835 _items = new NativeString(s.bytelen)
836 if s isa FlatText then
837 _items = s._items
838 else
839 for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0)
840 end
841 _bytelen = s.bytelen
842 _length = s.length
843 _capacity = _bytelen
844 written = true
845 end
846
847 # Create a new empty string with a given capacity.
848 init with_capacity(cap: Int)
849 do
850 assert cap >= 0
851 _items = new NativeString(cap + 1)
852 capacity = cap
853 _bytelen = 0
854 end
855
856 redef fun append(s)
857 do
858 if s.is_empty then return
859 is_dirty = true
860 var sl = s.bytelen
861 var nln = _bytelen + sl
862 enlarge(nln)
863 if s isa FlatText then
864 s._items.copy_to(_items, sl, s.first_byte, _bytelen)
865 else
866 for i in s.substrings do append i
867 return
868 end
869 _bytelen = nln
870 _length += s.length
871 end
872
873 # Copies the content of self in `dest`
874 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
875 do
876 var self_chars = self.chars
877 var dest_chars = dest.chars
878 for i in [0..len-1] do
879 dest_chars[new_start+i] = self_chars[start+i]
880 end
881 end
882
883 redef fun substring(from, count)
884 do
885 assert count >= 0
886 if from < 0 then from = 0
887 if (from + count) > _length then count = _length - from
888 if count <= 0 then return new Buffer
889 var its = _items
890 var bytefrom = its.char_to_byte_index(from)
891 var byteto = its.char_to_byte_index(count + from - 1)
892 byteto += its.char_at(byteto).u8char_len - 1
893 var byte_length = byteto - bytefrom + 1
894 var r_items = new NativeString(byte_length)
895 its.copy_to(r_items, byte_length, bytefrom, 0)
896 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
897 end
898
899 redef fun reverse
900 do
901 written = false
902 var ns = new FlatBuffer.with_capacity(capacity)
903 for i in chars.reverse_iterator do ns.add i
904 _items = ns._items
905 end
906
907 redef fun times(repeats)
908 do
909 var bln = _bytelen
910 var x = new FlatString.full(_items, bln, 0, _length)
911 for i in [1 .. repeats[ do
912 append(x)
913 end
914 end
915
916 redef fun upper
917 do
918 if written then reset
919 for i in [0 .. _length[ do self[i] = self[i].to_upper
920 end
921
922 redef fun lower
923 do
924 if written then reset
925 for i in [0 .. _length[ do self[i] = self[i].to_lower
926 end
927 end
928
929 private class FlatBufferByteReverseIterator
930 super IndexedIterator[Byte]
931
932 var target: FlatBuffer
933
934 var target_items: NativeString is noautoinit
935
936 var curr_pos: Int
937
938 init do target_items = target._items
939
940 redef fun index do return curr_pos
941
942 redef fun is_ok do return curr_pos >= 0
943
944 redef fun item do return target_items[curr_pos]
945
946 redef fun next do curr_pos -= 1
947
948 end
949
950 private class FlatBufferByteView
951 super BufferByteView
952
953 redef type SELFTYPE: FlatBuffer
954
955 redef fun [](index) do return target._items[index]
956
957 redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
958
959 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
960
961 end
962
963 private class FlatBufferByteIterator
964 super IndexedIterator[Byte]
965
966 var target: FlatBuffer
967
968 var target_items: NativeString is noautoinit
969
970 var curr_pos: Int
971
972 init do target_items = target._items
973
974 redef fun index do return curr_pos
975
976 redef fun is_ok do return curr_pos < target._bytelen
977
978 redef fun item do return target_items[curr_pos]
979
980 redef fun next do curr_pos += 1
981
982 end
983
984 private class FlatBufferCharReverseIterator
985 super IndexedIterator[Char]
986
987 var target: FlatBuffer
988
989 var curr_pos: Int
990
991 redef fun index do return curr_pos
992
993 redef fun is_ok do return curr_pos >= 0
994
995 redef fun item do return target[curr_pos]
996
997 redef fun next do curr_pos -= 1
998
999 end
1000
1001 private class FlatBufferCharView
1002 super BufferCharView
1003
1004 redef type SELFTYPE: FlatBuffer
1005
1006 redef fun [](index) do return target[index]
1007
1008 redef fun []=(index, item)
1009 do
1010 assert index >= 0 and index <= length
1011 if index == length then
1012 add(item)
1013 return
1014 end
1015 target[index] = item
1016 end
1017
1018 redef fun push(c)
1019 do
1020 target.add(c)
1021 end
1022
1023 redef fun add(c)
1024 do
1025 target.add(c)
1026 end
1027
1028 fun enlarge(cap: Int)
1029 do
1030 target.enlarge(cap)
1031 end
1032
1033 redef fun append(s)
1034 do
1035 var s_length = s.length
1036 if target.capacity < s.length then enlarge(s_length + target._length)
1037 for i in s do target.add i
1038 end
1039
1040 redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
1041
1042 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
1043
1044 end
1045
1046 private class FlatBufferCharIterator
1047 super IndexedIterator[Char]
1048
1049 var target: FlatBuffer
1050
1051 var max: Int is noautoinit
1052
1053 var curr_pos: Int
1054
1055 init do max = target._length - 1
1056
1057 redef fun index do return curr_pos
1058
1059 redef fun is_ok do return curr_pos <= max
1060
1061 redef fun item do return target[curr_pos]
1062
1063 redef fun next do curr_pos += 1
1064
1065 end
1066
1067 redef class NativeString
1068 redef fun to_s
1069 do
1070 return to_s_with_length(cstring_length)
1071 end
1072
1073 # Returns `self` as a String of `length`.
1074 redef fun to_s_with_length(length): FlatString
1075 do
1076 assert length >= 0
1077 return clean_utf8(length)
1078 end
1079
1080 redef fun to_s_full(bytelen, unilen) do
1081 return new FlatString.full(self, bytelen, 0, unilen)
1082 end
1083
1084 redef fun to_s_unsafe(len) do
1085 if len == null then len = cstring_length
1086 return new FlatString.with_infos(self, len, 0)
1087 end
1088
1089 # Returns `self` as a new String.
1090 redef fun to_s_with_copy: FlatString
1091 do
1092 var length = cstring_length
1093 var r = clean_utf8(length)
1094 if r.items != self then return r
1095 var new_self = new NativeString(length + 1)
1096 copy_to(new_self, length, 0, 0)
1097 var str = new FlatString.with_infos(new_self, length, 0)
1098 new_self[length] = 0u8
1099 str.to_cstring = new_self
1100 return str
1101 end
1102
1103 # Cleans a NativeString if necessary
1104 fun clean_utf8(len: Int): FlatString do
1105 var replacements: nullable Array[Int] = null
1106 var end_length = len
1107 var pos = 0
1108 var chr_ln = 0
1109 var rem = len
1110 while rem > 0 do
1111 while rem >= 4 do
1112 var i = fetch_4_chars(pos)
1113 if i & 0x80808080 != 0 then break
1114 pos += 4
1115 chr_ln += 4
1116 rem -= 4
1117 end
1118 if rem == 0 then break
1119 var b = self[pos]
1120 if b & 0x80u8 == 0x00u8 then
1121 pos += 1
1122 chr_ln += 1
1123 rem -= 1
1124 continue
1125 end
1126 var nxst = length_of_char_at(pos)
1127 var ok_st: Bool
1128 if nxst == 1 then
1129 ok_st = b & 0x80u8 == 0u8
1130 else if nxst == 2 then
1131 ok_st = b & 0xE0u8 == 0xC0u8
1132 else if nxst == 3 then
1133 ok_st = b & 0xF0u8 == 0xE0u8
1134 else
1135 ok_st = b & 0xF8u8 == 0xF0u8
1136 end
1137 if not ok_st then
1138 if replacements == null then replacements = new Array[Int]
1139 replacements.add pos
1140 end_length += 2
1141 pos += 1
1142 rem -= 1
1143 chr_ln += 1
1144 continue
1145 end
1146 var ok_c: Bool
1147 var c = char_at(pos)
1148 var cp = c.code_point
1149 if nxst == 1 then
1150 ok_c = cp >= 0 and cp <= 0x7F
1151 else if nxst == 2 then
1152 ok_c = cp >= 0x80 and cp <= 0x7FF
1153 else if nxst == 3 then
1154 ok_c = cp >= 0x800 and cp <= 0xFFFF
1155 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1156 else
1157 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1158 end
1159 if not ok_c then
1160 if replacements == null then replacements = new Array[Int]
1161 replacements.add pos
1162 end_length += 2
1163 pos += 1
1164 chr_ln += 1
1165 rem -= 1
1166 continue
1167 end
1168 var clen = c.u8char_len
1169 pos += clen
1170 rem -= clen
1171 chr_ln += 1
1172 end
1173 var ret = self
1174 if end_length != len then
1175 ret = new NativeString(end_length)
1176 var old_repl = 0
1177 var off = 0
1178 var repls = replacements.as(not null)
1179 var r = repls.items.as(not null)
1180 var imax = repls.length
1181 for i in [0 .. imax[ do
1182 var repl_pos = r[i]
1183 var chkln = repl_pos - old_repl
1184 copy_to(ret, chkln, old_repl, off)
1185 off += chkln
1186 ret[off] = 0xEFu8
1187 ret[off + 1] = 0xBFu8
1188 ret[off + 2] = 0xBDu8
1189 old_repl = repl_pos + 1
1190 off += 3
1191 end
1192 copy_to(ret, len - old_repl, old_repl, off)
1193 end
1194 return new FlatString.full(ret, end_length, 0, chr_ln)
1195 end
1196
1197 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1198 #
1199 # Very unsafe, make sure to have room for this char prior to calling this function.
1200 private fun set_char_at(pos: Int, c: Char) do
1201 var ln = c.u8char_len
1202 native_set_char(pos, c, ln)
1203 end
1204
1205 private fun native_set_char(pos: Int, c: Char, ln: Int) `{
1206 char* dst = self + pos;
1207 switch(ln){
1208 case 1:
1209 dst[0] = c;
1210 break;
1211 case 2:
1212 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1213 dst[1] = 0x80 | (c & 0x3F);
1214 break;
1215 case 3:
1216 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1217 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1218 dst[2] = 0x80 | (c & 0x3F);
1219 break;
1220 case 4:
1221 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1222 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1223 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1224 dst[3] = 0x80 | (c & 0x3F);
1225 break;
1226 }
1227 `}
1228 end
1229
1230 redef class Int
1231 redef fun to_base(base, signed)
1232 do
1233 var l = digit_count(base)
1234 var s = new FlatBuffer.from(" " * l)
1235 fill_buffer(s, base, signed)
1236 return s.to_s
1237 end
1238
1239 # return displayable int in base 10 and signed
1240 #
1241 # assert 1.to_s == "1"
1242 # assert (-123).to_s == "-123"
1243 redef fun to_s do
1244 # Fast case for common numbers
1245 if self == 0 then return "0"
1246 if self == 1 then return "1"
1247
1248 var nslen = int_to_s_len
1249 var ns = new NativeString(nslen + 1)
1250 ns[nslen] = 0u8
1251 native_int_to_s(ns, nslen + 1)
1252 return new FlatString.full(ns, nslen, 0, nslen)
1253 end
1254 end
1255
1256 redef class Array[E]
1257
1258 # Fast implementation
1259 redef fun plain_to_s
1260 do
1261 var l = _length
1262 if l == 0 then return ""
1263 var its = _items.as(not null)
1264 var first = its[0]
1265 if l == 1 then if first == null then return "" else return first.to_s
1266 var na = new NativeArray[String](l)
1267 var i = 0
1268 var sl = 0
1269 var mypos = 0
1270 while i < l do
1271 var itsi = its[i]
1272 if itsi == null then
1273 i += 1
1274 continue
1275 end
1276 var tmp = itsi.to_s
1277 sl += tmp.bytelen
1278 na[mypos] = tmp
1279 i += 1
1280 mypos += 1
1281 end
1282 var ns = new NativeString(sl + 1)
1283 ns[sl] = 0u8
1284 i = 0
1285 var off = 0
1286 while i < mypos do
1287 var tmp = na[i]
1288 if tmp isa FlatString then
1289 var tpl = tmp._bytelen
1290 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1291 off += tpl
1292 else
1293 for j in tmp.substrings do
1294 var s = j.as(FlatString)
1295 var slen = s._bytelen
1296 s._items.copy_to(ns, slen, s._first_byte, off)
1297 off += slen
1298 end
1299 end
1300 i += 1
1301 end
1302 return new FlatString.with_infos(ns, sl, 0)
1303 end
1304 end
1305
1306 redef class NativeArray[E]
1307 redef fun native_to_s do
1308 assert self isa NativeArray[String]
1309 var l = length
1310 var na = self
1311 var i = 0
1312 var sl = 0
1313 var mypos = 0
1314 while i < l do
1315 sl += na[i].bytelen
1316 i += 1
1317 mypos += 1
1318 end
1319 var ns = new NativeString(sl + 1)
1320 ns[sl] = 0u8
1321 i = 0
1322 var off = 0
1323 while i < mypos do
1324 var tmp = na[i]
1325 if tmp isa FlatString then
1326 var tpl = tmp._bytelen
1327 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1328 off += tpl
1329 else
1330 for j in tmp.substrings do
1331 var s = j.as(FlatString)
1332 var slen = s._bytelen
1333 s._items.copy_to(ns, slen, s._first_byte, off)
1334 off += slen
1335 end
1336 end
1337 i += 1
1338 end
1339 return new FlatString.with_infos(ns, sl, 0)
1340 end
1341 end
1342
1343 redef class Map[K,V]
1344 redef fun join(sep, couple_sep)
1345 do
1346 if is_empty then return ""
1347
1348 var s = new Buffer # Result
1349
1350 # Concat first item
1351 var i = iterator
1352 var k = i.key
1353 var e = i.item
1354 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1355
1356 # Concat other _items
1357 i.next
1358 while i.is_ok do
1359 s.append(sep)
1360 k = i.key
1361 e = i.item
1362 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1363 i.next
1364 end
1365 return s.to_s
1366 end
1367 end