lib/core: Added an optimized `to_hex` function to `FlatText`
[nit.git] / lib / core / text / flat.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # All the array-based text representations
12 module flat
13
14 intrude import abstract_text
15 intrude import native
16
17 `{
18 #include <stdio.h>
19 #include <string.h>
20 `}
21
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
24
25 var tgt: nullable FlatText
26
27 redef fun item do
28 assert is_ok
29 return tgt.as(not null)
30 end
31
32 redef fun is_ok do return tgt != null
33
34 redef fun next do tgt = null
35 end
36
37 redef class FlatText
38
39 # First byte of the NativeString
40 protected fun first_byte: Int do return 0
41
42 # Last byte of the NativeString
43 protected fun last_byte: Int do return _bytelen - 1
44
45 # Cache of the latest position (char) explored in the string
46 var position: Int = 0
47
48 # Cached position (bytes) in the NativeString underlying the String
49 var bytepos: Int = 0
50
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index(index: Int): Int do
53 var ln = length
54 assert index >= 0
55 assert index < ln
56
57 var pos = _position
58 # Find best insertion point
59 var delta_begin = index
60 var delta_end = (ln - 1) - index
61 var delta_cache = (pos - index).abs
62 var min = delta_begin
63 var its = _items
64
65 if delta_cache < min then min = delta_cache
66 if delta_end < min then min = delta_end
67
68 var ns_i: Int
69 var my_i: Int
70
71 if min == delta_begin then
72 ns_i = first_byte
73 my_i = 0
74 else if min == delta_cache then
75 ns_i = _bytepos
76 my_i = pos
77 else
78 ns_i = its.find_beginning_of_char_at(last_byte)
79 my_i = length - 1
80 end
81
82 ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
83
84 _position = index
85 _bytepos = ns_i
86
87 return ns_i
88 end
89
90 # By escaping `self` to HTML, how many more bytes will be needed ?
91 fun chars_to_html_escape: Int do
92 var its = _items
93 var max = last_byte
94 var pos = first_byte
95 var endlen = 0
96 while pos <= max do
97 var c = its[pos]
98 if c == 0x3Cu8 then
99 endlen += 3
100 else if c == 0x3Eu8 then
101 endlen += 3
102 else if c == 0x26u8 then
103 endlen += 4
104 else if c == 0x22u8 then
105 endlen += 4
106 else if c == 0x27u8 then
107 endlen += 4
108 else if c == 0x2Fu8 then
109 endlen += 4
110 end
111 pos += 1
112 end
113 return endlen
114 end
115
116 redef fun html_escape
117 do
118 var extra = chars_to_html_escape
119 if extra == 0 then return to_s
120 var its = _items
121 var max = last_byte
122 var pos = first_byte
123 var nlen = extra + _bytelen
124 var nits = new NativeString(nlen)
125 var outpos = 0
126 while pos <= max do
127 var c = its[pos]
128 # Special codes:
129 # Some HTML characters are used as meta-data, they need
130 # to be replaced by an HTML-Escaped equivalent
131 #
132 # * 0x3C (<) => &lt;
133 # * 0x3E (>) => &gt;
134 # * 0x26 (&) => &amp;
135 # * 0x22 (") => &#34;
136 # * 0x27 (') => &#39;
137 # * 0x2F (/) => &#47;
138 if c == 0x3Cu8 then
139 nits[outpos] = 0x26u8
140 nits[outpos + 1] = 0x6Cu8
141 nits[outpos + 2] = 0x74u8
142 nits[outpos + 3] = 0x3Bu8
143 outpos += 4
144 else if c == 0x3Eu8 then
145 nits[outpos] = 0x26u8
146 nits[outpos + 1] = 0x67u8
147 nits[outpos + 2] = 0x74u8
148 nits[outpos + 3] = 0x3Bu8
149 outpos += 4
150 else if c == 0x26u8 then
151 nits[outpos] = 0x26u8
152 nits[outpos + 1] = 0x61u8
153 nits[outpos + 2] = 0x6Du8
154 nits[outpos + 3] = 0x70u8
155 nits[outpos + 4] = 0x3Bu8
156 outpos += 5
157 else if c == 0x22u8 then
158 nits[outpos] = 0x26u8
159 nits[outpos + 1] = 0x23u8
160 nits[outpos + 2] = 0x33u8
161 nits[outpos + 3] = 0x34u8
162 nits[outpos + 4] = 0x3Bu8
163 outpos += 5
164 else if c == 0x27u8 then
165 nits[outpos] = 0x26u8
166 nits[outpos + 1] = 0x23u8
167 nits[outpos + 2] = 0x33u8
168 nits[outpos + 3] = 0x39u8
169 nits[outpos + 4] = 0x3Bu8
170 outpos += 5
171 else if c == 0x2Fu8 then
172 nits[outpos] = 0x26u8
173 nits[outpos + 1] = 0x23u8
174 nits[outpos + 2] = 0x34u8
175 nits[outpos + 3] = 0x37u8
176 nits[outpos + 4] = 0x3Bu8
177 outpos += 5
178 else
179 nits[outpos] = c
180 outpos += 1
181 end
182 pos += 1
183 end
184 var s = new FlatString.with_infos(nits, nlen, 0, nlen - 1)
185 return s
186 end
187
188 # By escaping `self` to C, how many more bytes will be needed ?
189 #
190 # This enables a double-optimization in `escape_to_c` since if this
191 # method returns 0, then `self` does not need escaping and can be
192 # returned as-is
193 fun chars_to_escape_to_c: Int do
194 var its = _items
195 var max = last_byte
196 var pos = first_byte
197 var req_esc = 0
198 while pos <= max do
199 var c = its[pos]
200 if c == 0x0Au8 then
201 req_esc += 1
202 else if c == 0x09u8 then
203 req_esc += 1
204 else if c == 0x22u8 then
205 req_esc += 1
206 else if c == 0x27u8 then
207 req_esc += 1
208 else if c == 0x5Cu8 then
209 req_esc += 1
210 else if c < 32u8 then
211 req_esc += 3
212 end
213 pos += 1
214 end
215 return req_esc
216 end
217
218 redef fun escape_to_c do
219 var ln_extra = chars_to_escape_to_c
220 if ln_extra == 0 then return self.to_s
221 var its = _items
222 var max = last_byte
223 var nlen = _bytelen + ln_extra
224 var nns = new NativeString(nlen)
225 var pos = first_byte
226 var opos = 0
227 while pos <= max do
228 var c = its[pos]
229 # Special codes:
230 #
231 # Any byte with value < 32 is a control character
232 # All their uses will be replaced by their octal
233 # value in C.
234 #
235 # There are two exceptions however:
236 #
237 # * 0x09 => \t
238 # * 0x0A => \n
239 #
240 # Aside from the code points above, the following are:
241 #
242 # * 0x22 => \"
243 # * 0x27 => \'
244 # * 0x5C => \\
245 if c == 0x09u8 then
246 nns[opos] = 0x5Cu8
247 nns[opos + 1] = 0x74u8
248 opos += 2
249 else if c == 0x0Au8 then
250 nns[opos] = 0x5Cu8
251 nns[opos + 1] = 0x6Eu8
252 opos += 2
253 else if c == 0x22u8 then
254 nns[opos] = 0x5Cu8
255 nns[opos + 1] = 0x22u8
256 opos += 2
257 else if c == 0x27u8 then
258 nns[opos] = 0x5Cu8
259 nns[opos + 1] = 0x27u8
260 opos += 2
261 else if c == 0x5Cu8 then
262 nns[opos] = 0x5Cu8
263 nns[opos + 1] = 0x5Cu8
264 opos += 2
265 else if c < 32u8 then
266 nns[opos] = 0x5Cu8
267 nns[opos + 1] = 0x30u8
268 nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
269 nns[opos + 3] = (c & 0x07u8) + 0x30u8
270 opos += 4
271 else
272 nns[opos] = c
273 opos += 1
274 end
275 pos += 1
276 end
277 return nns.to_s_with_length(nlen)
278 end
279
280 redef fun [](index) do return _items.char_at(char_to_byte_index(index))
281
282 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
283 #
284 # assert "ff".to_hex == 255
285 redef fun to_hex(pos, ln) do
286 var res = 0
287 if pos == null then pos = 0
288 if ln == null then ln = length - pos
289 pos = char_to_byte_index(pos)
290 var its = _items
291 var max = pos + ln
292 for i in [pos .. max[ do
293 res <<= 4
294 res += its[i].ascii.from_hex
295 end
296 return res
297 end
298 end
299
300 # Immutable strings of characters.
301 class FlatString
302 super FlatText
303 super String
304
305 # Index at which `self` begins in `_items`, inclusively
306 redef var first_byte is noinit
307
308 # Index at which `self` ends in `_items`, inclusively
309 redef var last_byte is noinit
310
311 redef var chars = new FlatStringCharView(self) is lazy
312
313 redef var bytes = new FlatStringByteView(self) is lazy
314
315 redef var length is lazy do
316 if _bytelen == 0 then return 0
317 return _items.utf8_length(_first_byte, _last_byte)
318 end
319
320 redef var to_cstring is lazy do
321 var blen = _bytelen
322 var new_items = new NativeString(blen + 1)
323 _items.copy_to(new_items, blen, _first_byte, 0)
324 new_items[blen] = 0u8
325 return new_items
326 end
327
328 redef fun reversed
329 do
330 var b = new FlatBuffer.with_capacity(_bytelen + 1)
331 for i in [length - 1 .. 0].step(-1) do
332 b.add self[i]
333 end
334 var s = b.to_s.as(FlatString)
335 s.length = self.length
336 return s
337 end
338
339 redef fun fast_cstring do return _items.fast_cstring(_first_byte)
340
341 redef fun substring(from, count)
342 do
343 assert count >= 0
344
345 if from < 0 then
346 count += from
347 if count < 0 then count = 0
348 from = 0
349 end
350
351 if (count + from) > length then count = length - from
352 if count <= 0 then return ""
353 var end_index = from + count - 1
354
355 var bytefrom = char_to_byte_index(from)
356 var byteto = char_to_byte_index(end_index)
357 var its = _items
358 byteto += its.length_of_char_at(byteto) - 1
359
360 var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, byteto, count)
361 return s
362 end
363
364 redef fun empty do return "".as(FlatString)
365
366 redef fun to_upper
367 do
368 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
369
370 var mylen = length
371 var pos = 0
372
373 while pos < mylen do
374 outstr.add(chars[pos].to_upper)
375 pos += 1
376 end
377
378 return outstr.to_s
379 end
380
381 redef fun to_lower
382 do
383 var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
384
385 var mylen = length
386 var pos = 0
387
388 while pos < mylen do
389 outstr.add(chars[pos].to_lower)
390 pos += 1
391 end
392
393 return outstr.to_s
394 end
395
396 redef fun output
397 do
398 for i in chars do i.output
399 end
400
401 ##################################################
402 # String Specific Methods #
403 ##################################################
404
405 # Low-level creation of a new string with minimal data.
406 #
407 # `_items` will be used as is, without copy, to retrieve the characters of the string.
408 # Aliasing issues is the responsibility of the caller.
409 private init with_infos(items: NativeString, bytelen, from, to: Int)
410 do
411 self._items = items
412 self._bytelen = bytelen
413 _first_byte = from
414 _last_byte = to
415 _bytepos = from
416 end
417
418 # Low-level creation of a new string with all the data.
419 #
420 # `_items` will be used as is, without copy, to retrieve the characters of the string.
421 # Aliasing issues is the responsibility of the caller.
422 private init full(items: NativeString, bytelen, from, to, length: Int)
423 do
424 self._items = items
425 self.length = length
426 self._bytelen = bytelen
427 _first_byte = from
428 _last_byte = to
429 _bytepos = from
430 end
431
432 redef fun ==(other)
433 do
434 if not other isa FlatText then return super
435
436 if self.object_id == other.object_id then return true
437
438 var my_length = _bytelen
439
440 if other._bytelen != my_length then return false
441
442 var my_index = _first_byte
443 var its_index = other.first_byte
444
445 var last_iteration = my_index + my_length
446
447 var its_items = other._items
448 var my_items = self._items
449
450 while my_index < last_iteration do
451 if my_items[my_index] != its_items[its_index] then return false
452 my_index += 1
453 its_index += 1
454 end
455
456 return true
457 end
458
459 redef fun <(other)
460 do
461 if not other isa FlatText then return super
462
463 if self.object_id == other.object_id then return false
464
465 var myits = _items
466 var itsits = other._items
467
468 var mbt = _bytelen
469 var obt = other.bytelen
470
471 var minln = if mbt < obt then mbt else obt
472 var mst = _first_byte
473 var ost = other.first_byte
474
475 for i in [0 .. minln[ do
476 var my_curr_char = myits[mst]
477 var its_curr_char = itsits[ost]
478
479 if my_curr_char > its_curr_char then return false
480 if my_curr_char < its_curr_char then return true
481
482 mst += 1
483 ost += 1
484 end
485
486 return mbt < obt
487 end
488
489 redef fun +(o) do
490 var s = o.to_s
491 var slen = s.bytelen
492 var mlen = _bytelen
493 var nlen = mlen + slen
494 var mits = _items
495 var mifrom = _first_byte
496 if s isa FlatText then
497 var sits = s._items
498 var sifrom = s.first_byte
499 var ns = new NativeString(nlen + 1)
500 mits.copy_to(ns, mlen, mifrom, 0)
501 sits.copy_to(ns, slen, sifrom, mlen)
502 return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
503 else
504 abort
505 end
506 end
507
508 redef fun *(i) do
509 var mybtlen = _bytelen
510 var new_bytelen = mybtlen * i
511 var mylen = length
512 var newlen = mylen * i
513 var its = _items
514 var fb = _first_byte
515 var ns = new NativeString(new_bytelen + 1)
516 ns[new_bytelen] = 0u8
517 var offset = 0
518 while i > 0 do
519 its.copy_to(ns, mybtlen, fb, offset)
520 offset += mybtlen
521 i -= 1
522 end
523 return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
524 end
525
526
527 redef fun hash
528 do
529 if hash_cache == null then
530 # djb2 hash algorithm
531 var h = 5381
532 var i = _first_byte
533
534 var my_items = _items
535 var max = _last_byte
536
537 while i <= max do
538 h = (h << 5) + h + my_items[i].to_i
539 i += 1
540 end
541
542 hash_cache = h
543 end
544
545 return hash_cache.as(not null)
546 end
547
548 redef fun substrings do return new FlatSubstringsIter(self)
549 end
550
551 private class FlatStringCharReverseIterator
552 super IndexedIterator[Char]
553
554 var target: FlatString
555
556 var curr_pos: Int
557
558 redef fun is_ok do return curr_pos >= 0
559
560 redef fun item do return target[curr_pos]
561
562 redef fun next do curr_pos -= 1
563
564 redef fun index do return curr_pos
565
566 end
567
568 private class FlatStringCharIterator
569 super IndexedIterator[Char]
570
571 var target: FlatString
572
573 var max: Int is noautoinit
574
575 var curr_pos: Int
576
577 init do max = target.length - 1
578
579 redef fun is_ok do return curr_pos <= max
580
581 redef fun item do return target[curr_pos]
582
583 redef fun next do curr_pos += 1
584
585 redef fun index do return curr_pos
586
587 end
588
589 private class FlatStringCharView
590 super StringCharView
591
592 redef type SELFTYPE: FlatString
593
594 redef fun [](index) do return target[index]
595
596 redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
597
598 redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
599
600 end
601
602 private class FlatStringByteReverseIterator
603 super IndexedIterator[Byte]
604
605 var target: FlatString
606
607 var target_items: NativeString is noautoinit
608
609 var curr_pos: Int
610
611 init
612 do
613 var tgt = target
614 target_items = tgt._items
615 curr_pos += tgt._first_byte
616 end
617
618 redef fun is_ok do return curr_pos >= target._first_byte
619
620 redef fun item do return target_items[curr_pos]
621
622 redef fun next do curr_pos -= 1
623
624 redef fun index do return curr_pos - target._first_byte
625
626 end
627
628 private class FlatStringByteIterator
629 super IndexedIterator[Byte]
630
631 var target: FlatString
632
633 var target_items: NativeString is noautoinit
634
635 var curr_pos: Int
636
637 init
638 do
639 var tgt = target
640 target_items = tgt._items
641 curr_pos += tgt._first_byte
642 end
643
644 redef fun is_ok do return curr_pos <= target._last_byte
645
646 redef fun item do return target_items[curr_pos]
647
648 redef fun next do curr_pos += 1
649
650 redef fun index do return curr_pos - target._first_byte
651
652 end
653
654 private class FlatStringByteView
655 super StringByteView
656
657 redef type SELFTYPE: FlatString
658
659 redef fun [](index)
660 do
661 # Check that the index (+ _first_byte) is not larger than _last_byte
662 # In other terms, if the index is valid
663 assert index >= 0
664 var target = self.target
665 var ind = index + target._first_byte
666 assert ind <= target._last_byte
667 return target._items[ind]
668 end
669
670 redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
671
672 redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
673
674 end
675
676 redef class Buffer
677 redef new do return new FlatBuffer
678
679 redef new with_cap(i) do return new FlatBuffer.with_capacity(i)
680 end
681
682 # Mutable strings of characters.
683 class FlatBuffer
684 super FlatText
685 super Buffer
686
687 redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
688
689 redef var bytes = new FlatBufferByteView(self) is lazy
690
691 redef var length = 0
692
693 private var char_cache: Int = -1
694
695 private var byte_cache: Int = -1
696
697 private var capacity = 0
698
699 # Real items, used as cache for when to_cstring is called
700 private var real_items: NativeString is noinit
701
702 redef fun fast_cstring do return _items.fast_cstring(0)
703
704 redef fun substrings do return new FlatSubstringsIter(self)
705
706 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
707 #
708 # This happens when an operation modifies the current `Buffer` and
709 # the Copy-On-Write flag `written` is set at true.
710 private fun reset do
711 var nns = new NativeString(capacity)
712 if _bytelen != 0 then _items.copy_to(nns, _bytelen, 0, 0)
713 _items = nns
714 written = false
715 end
716
717 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
718 #
719 # Internal only, does not modify _bytelen or length, this is the caller's responsability
720 private fun rshift_bytes(from: Int, len: Int) do
721 var oit = _items
722 var nit = _items
723 var bt = _bytelen
724 if bt + len > capacity then
725 capacity = capacity * 2 + 2
726 nit = new NativeString(capacity)
727 oit.copy_to(nit, 0, 0, from)
728 end
729 oit.copy_to(nit, bt - from, from, from + len)
730 end
731
732 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
733 #
734 # Internal only, does not modify _bytelen or length, this is the caller's responsability
735 private fun lshift_bytes(from: Int, len: Int) do
736 var it = _items
737 it.copy_to(it, _bytelen - from, from, from - len)
738 end
739
740 redef fun []=(index, item)
741 do
742 assert index >= 0 and index <= length
743 if written then reset
744 is_dirty = true
745 if index == length then
746 add item
747 return
748 end
749 var it = _items
750 var ip = it.char_to_byte_index(index)
751 var c = it.char_at(ip)
752 var clen = c.u8char_len
753 var itemlen = item.u8char_len
754 var size_diff = itemlen - clen
755 if size_diff > 0 then
756 rshift_bytes(ip + clen, size_diff)
757 else if size_diff < 0 then
758 lshift_bytes(ip + clen, -size_diff)
759 end
760 _bytelen += size_diff
761 bytepos += size_diff
762 it.set_char_at(ip, item)
763 end
764
765 redef fun add(c)
766 do
767 if written then reset
768 is_dirty = true
769 var clen = c.u8char_len
770 var bt = _bytelen
771 enlarge(bt + clen)
772 _items.set_char_at(bt, c)
773 _bytelen += clen
774 length += 1
775 end
776
777 redef fun clear do
778 is_dirty = true
779 if written then reset
780 _bytelen = 0
781 length = 0
782 end
783
784 redef fun empty do return new Buffer
785
786 redef fun enlarge(cap)
787 do
788 var c = capacity
789 if cap <= c then return
790 while c <= cap do c = c * 2 + 2
791 # The COW flag can be set at false here, since
792 # it does a copy of the current `Buffer`
793 written = false
794 var bln = _bytelen
795 var a = new NativeString(c+1)
796 if bln > 0 then
797 var it = _items
798 if bln > 0 then it.copy_to(a, bln, 0, 0)
799 end
800 _items = a
801 capacity = c
802 end
803
804 redef fun to_s
805 do
806 written = true
807 var bln = _bytelen
808 if bln == 0 then _items = new NativeString(1)
809 return new FlatString.full(_items, bln, 0, bln - 1, length)
810 end
811
812 redef fun to_cstring
813 do
814 if is_dirty then
815 var bln = _bytelen
816 var new_native = new NativeString(bln + 1)
817 new_native[bln] = 0u8
818 if length > 0 then _items.copy_to(new_native, bln, 0, 0)
819 real_items = new_native
820 is_dirty = false
821 end
822 return real_items
823 end
824
825 # Create a new empty string.
826 init do end
827
828 # Low-level creation a new buffer with given data.
829 #
830 # `_items` will be used as is, without copy, to store the characters of the buffer.
831 # Aliasing issues is the responsibility of the caller.
832 #
833 # If `_items` is shared, `written` should be set to true after the creation
834 # so that a modification will do a copy-on-write.
835 private init with_infos(items: NativeString, capacity, bytelen, length: Int)
836 do
837 self._items = items
838 self.capacity = capacity
839 self._bytelen = bytelen
840 self.length = length
841 end
842
843 # Create a new string copied from `s`.
844 init from(s: Text)
845 do
846 _items = new NativeString(s.bytelen)
847 if s isa FlatText then
848 _items = s._items
849 else
850 for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0)
851 end
852 _bytelen = s.bytelen
853 length = s.length
854 _capacity = _bytelen
855 written = true
856 end
857
858 # Create a new empty string with a given capacity.
859 init with_capacity(cap: Int)
860 do
861 assert cap >= 0
862 _items = new NativeString(cap + 1)
863 capacity = cap
864 _bytelen = 0
865 end
866
867 redef fun append(s)
868 do
869 if s.is_empty then return
870 is_dirty = true
871 var sl = s.bytelen
872 var nln = _bytelen + sl
873 enlarge(nln)
874 if s isa FlatText then
875 s._items.copy_to(_items, sl, s.first_byte, _bytelen)
876 else
877 for i in s.substrings do append i
878 return
879 end
880 _bytelen = nln
881 length += s.length
882 end
883
884 # Copies the content of self in `dest`
885 fun copy(start: Int, len: Int, dest: Buffer, new_start: Int)
886 do
887 var self_chars = self.chars
888 var dest_chars = dest.chars
889 for i in [0..len-1] do
890 dest_chars[new_start+i] = self_chars[start+i]
891 end
892 end
893
894 redef fun substring(from, count)
895 do
896 assert count >= 0
897 if from < 0 then from = 0
898 if (from + count) > length then count = length - from
899 if count <= 0 then return new Buffer
900 var its = _items
901 var bytefrom = its.char_to_byte_index(from)
902 var byteto = its.char_to_byte_index(count + from - 1)
903 byteto += its.char_at(byteto).u8char_len - 1
904 var byte_length = byteto - bytefrom + 1
905 var r_items = new NativeString(byte_length)
906 its.copy_to(r_items, byte_length, bytefrom, 0)
907 return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
908 end
909
910 redef fun reverse
911 do
912 written = false
913 var ns = new FlatBuffer.with_capacity(capacity)
914 for i in chars.reverse_iterator do ns.add i
915 _items = ns._items
916 end
917
918 redef fun times(repeats)
919 do
920 var bln = _bytelen
921 var x = new FlatString.full(_items, bln, 0, bln - 1, length)
922 for i in [1 .. repeats[ do
923 append(x)
924 end
925 end
926
927 redef fun upper
928 do
929 if written then reset
930 for i in [0 .. length[ do self[i] = self[i].to_upper
931 end
932
933 redef fun lower
934 do
935 if written then reset
936 for i in [0 .. length[ do self[i] = self[i].to_lower
937 end
938 end
939
940 private class FlatBufferByteReverseIterator
941 super IndexedIterator[Byte]
942
943 var target: FlatBuffer
944
945 var target_items: NativeString is noautoinit
946
947 var curr_pos: Int
948
949 init do target_items = target._items
950
951 redef fun index do return curr_pos
952
953 redef fun is_ok do return curr_pos >= 0
954
955 redef fun item do return target_items[curr_pos]
956
957 redef fun next do curr_pos -= 1
958
959 end
960
961 private class FlatBufferByteView
962 super BufferByteView
963
964 redef type SELFTYPE: FlatBuffer
965
966 redef fun [](index) do return target._items[index]
967
968 redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
969
970 redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
971
972 end
973
974 private class FlatBufferByteIterator
975 super IndexedIterator[Byte]
976
977 var target: FlatBuffer
978
979 var target_items: NativeString is noautoinit
980
981 var curr_pos: Int
982
983 init do target_items = target._items
984
985 redef fun index do return curr_pos
986
987 redef fun is_ok do return curr_pos < target._bytelen
988
989 redef fun item do return target_items[curr_pos]
990
991 redef fun next do curr_pos += 1
992
993 end
994
995 private class FlatBufferCharReverseIterator
996 super IndexedIterator[Char]
997
998 var target: FlatBuffer
999
1000 var curr_pos: Int
1001
1002 redef fun index do return curr_pos
1003
1004 redef fun is_ok do return curr_pos >= 0
1005
1006 redef fun item do return target[curr_pos]
1007
1008 redef fun next do curr_pos -= 1
1009
1010 end
1011
1012 private class FlatBufferCharView
1013 super BufferCharView
1014
1015 redef type SELFTYPE: FlatBuffer
1016
1017 redef fun [](index) do return target[index]
1018
1019 redef fun []=(index, item)
1020 do
1021 assert index >= 0 and index <= length
1022 if index == length then
1023 add(item)
1024 return
1025 end
1026 target[index] = item
1027 end
1028
1029 redef fun push(c)
1030 do
1031 target.add(c)
1032 end
1033
1034 redef fun add(c)
1035 do
1036 target.add(c)
1037 end
1038
1039 fun enlarge(cap: Int)
1040 do
1041 target.enlarge(cap)
1042 end
1043
1044 redef fun append(s)
1045 do
1046 var s_length = s.length
1047 if target.capacity < s.length then enlarge(s_length + target.length)
1048 for i in s do target.add i
1049 end
1050
1051 redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
1052
1053 redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
1054
1055 end
1056
1057 private class FlatBufferCharIterator
1058 super IndexedIterator[Char]
1059
1060 var target: FlatBuffer
1061
1062 var max: Int is noautoinit
1063
1064 var curr_pos: Int
1065
1066 init do max = target.length - 1
1067
1068 redef fun index do return curr_pos
1069
1070 redef fun is_ok do return curr_pos <= max
1071
1072 redef fun item do return target[curr_pos]
1073
1074 redef fun next do curr_pos += 1
1075
1076 end
1077
1078 redef class NativeString
1079 redef fun to_s
1080 do
1081 return to_s_with_length(cstring_length)
1082 end
1083
1084 # Returns `self` as a String of `length`.
1085 redef fun to_s_with_length(length): FlatString
1086 do
1087 assert length >= 0
1088 return clean_utf8(length)
1089 end
1090
1091 redef fun to_s_full(bytelen, unilen) do
1092 return new FlatString.full(self, bytelen, 0, bytelen - 1, unilen)
1093 end
1094
1095 # Returns `self` as a new String.
1096 redef fun to_s_with_copy: FlatString
1097 do
1098 var length = cstring_length
1099 var r = clean_utf8(length)
1100 if r.items != self then return r
1101 var new_self = new NativeString(length + 1)
1102 copy_to(new_self, length, 0, 0)
1103 var str = new FlatString.with_infos(new_self, length, 0, length - 1)
1104 new_self[length] = 0u8
1105 str.to_cstring = new_self
1106 return str
1107 end
1108
1109 # Cleans a NativeString if necessary
1110 fun clean_utf8(len: Int): FlatString do
1111 var replacements: nullable Array[Int] = null
1112 var end_length = len
1113 var pos = 0
1114 var chr_ln = 0
1115 while pos < len do
1116 var b = self[pos]
1117 var nxst = length_of_char_at(pos)
1118 var ok_st: Bool
1119 if nxst == 1 then
1120 ok_st = b & 0x80u8 == 0u8
1121 else if nxst == 2 then
1122 ok_st = b & 0xE0u8 == 0xC0u8
1123 else if nxst == 3 then
1124 ok_st = b & 0xF0u8 == 0xE0u8
1125 else
1126 ok_st = b & 0xF8u8 == 0xF0u8
1127 end
1128 if not ok_st then
1129 if replacements == null then replacements = new Array[Int]
1130 replacements.add pos
1131 end_length += 2
1132 pos += 1
1133 chr_ln += 1
1134 continue
1135 end
1136 var ok_c: Bool
1137 var c = char_at(pos)
1138 var cp = c.code_point
1139 if nxst == 1 then
1140 ok_c = cp >= 0 and cp <= 0x7F
1141 else if nxst == 2 then
1142 ok_c = cp >= 0x80 and cp <= 0x7FF
1143 else if nxst == 3 then
1144 ok_c = cp >= 0x800 and cp <= 0xFFFF
1145 ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
1146 else
1147 ok_c = cp >= 0x10000 and cp <= 0x10FFFF
1148 end
1149 if not ok_c then
1150 if replacements == null then replacements = new Array[Int]
1151 replacements.add pos
1152 end_length += 2
1153 pos += 1
1154 chr_ln += 1
1155 continue
1156 end
1157 pos += c.u8char_len
1158 chr_ln += 1
1159 end
1160 var ret = self
1161 if end_length != len then
1162 ret = new NativeString(end_length)
1163 var old_repl = 0
1164 var off = 0
1165 var repls = replacements.as(not null)
1166 var r = repls.items.as(not null)
1167 var imax = repls.length
1168 for i in [0 .. imax[ do
1169 var repl_pos = r[i]
1170 var chkln = repl_pos - old_repl
1171 copy_to(ret, chkln, old_repl, off)
1172 off += chkln
1173 ret[off] = 0xEFu8
1174 ret[off + 1] = 0xBFu8
1175 ret[off + 2] = 0xBDu8
1176 old_repl = repl_pos + 1
1177 off += 3
1178 end
1179 copy_to(ret, len - old_repl, old_repl, off)
1180 end
1181 return new FlatString.full(ret, end_length, 0, end_length - 1, chr_ln)
1182 end
1183
1184 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1185 #
1186 # Very unsafe, make sure to have room for this char prior to calling this function.
1187 private fun set_char_at(pos: Int, c: Char) do
1188 var ln = c.u8char_len
1189 native_set_char(pos, c, ln)
1190 end
1191
1192 private fun native_set_char(pos: Int, c: Char, ln: Int) `{
1193 char* dst = self + pos;
1194 switch(ln){
1195 case 1:
1196 dst[0] = c;
1197 break;
1198 case 2:
1199 dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
1200 dst[1] = 0x80 | (c & 0x3F);
1201 break;
1202 case 3:
1203 dst[0] = 0xE0 | ((c & 0xF000) >> 12);
1204 dst[1] = 0x80 | ((c & 0xFC0) >> 6);
1205 dst[2] = 0x80 | (c & 0x3F);
1206 break;
1207 case 4:
1208 dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
1209 dst[1] = 0x80 | ((c & 0x3F000) >> 12);
1210 dst[2] = 0x80 | ((c & 0xFC0) >> 6);
1211 dst[3] = 0x80 | (c & 0x3F);
1212 break;
1213 }
1214 `}
1215 end
1216
1217 redef class Int
1218 redef fun to_base(base, signed)
1219 do
1220 var l = digit_count(base)
1221 var s = new FlatBuffer.from(" " * l)
1222 fill_buffer(s, base, signed)
1223 return s.to_s
1224 end
1225
1226 # return displayable int in base 10 and signed
1227 #
1228 # assert 1.to_s == "1"
1229 # assert (-123).to_s == "-123"
1230 redef fun to_s do
1231 # Fast case for common numbers
1232 if self == 0 then return "0"
1233 if self == 1 then return "1"
1234
1235 var nslen = int_to_s_len
1236 var ns = new NativeString(nslen + 1)
1237 ns[nslen] = 0u8
1238 native_int_to_s(ns, nslen + 1)
1239 return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
1240 end
1241 end
1242
1243 redef class Array[E]
1244
1245 # Fast implementation
1246 redef fun plain_to_s
1247 do
1248 var l = length
1249 if l == 0 then return ""
1250 var its = _items.as(not null)
1251 var first = its[0]
1252 if l == 1 then if first == null then return "" else return first.to_s
1253 var na = new NativeArray[String](l)
1254 var i = 0
1255 var sl = 0
1256 var mypos = 0
1257 while i < l do
1258 var itsi = its[i]
1259 if itsi == null then
1260 i += 1
1261 continue
1262 end
1263 var tmp = itsi.to_s
1264 sl += tmp.bytelen
1265 na[mypos] = tmp
1266 i += 1
1267 mypos += 1
1268 end
1269 var ns = new NativeString(sl + 1)
1270 ns[sl] = 0u8
1271 i = 0
1272 var off = 0
1273 while i < mypos do
1274 var tmp = na[i]
1275 if tmp isa FlatString then
1276 var tpl = tmp._bytelen
1277 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1278 off += tpl
1279 else
1280 for j in tmp.substrings do
1281 var s = j.as(FlatString)
1282 var slen = s._bytelen
1283 s._items.copy_to(ns, slen, s._first_byte, off)
1284 off += slen
1285 end
1286 end
1287 i += 1
1288 end
1289 return new FlatString.with_infos(ns, sl, 0, sl - 1)
1290 end
1291 end
1292
1293 redef class NativeArray[E]
1294 redef fun native_to_s do
1295 assert self isa NativeArray[String]
1296 var l = length
1297 var na = self
1298 var i = 0
1299 var sl = 0
1300 var mypos = 0
1301 while i < l do
1302 sl += na[i].bytelen
1303 i += 1
1304 mypos += 1
1305 end
1306 var ns = new NativeString(sl + 1)
1307 ns[sl] = 0u8
1308 i = 0
1309 var off = 0
1310 while i < mypos do
1311 var tmp = na[i]
1312 if tmp isa FlatString then
1313 var tpl = tmp._bytelen
1314 tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
1315 off += tpl
1316 else
1317 for j in tmp.substrings do
1318 var s = j.as(FlatString)
1319 var slen = s._bytelen
1320 s._items.copy_to(ns, slen, s._first_byte, off)
1321 off += slen
1322 end
1323 end
1324 i += 1
1325 end
1326 return new FlatString.with_infos(ns, sl, 0, sl - 1)
1327 end
1328 end
1329
1330 redef class Map[K,V]
1331 redef fun join(sep, couple_sep)
1332 do
1333 if is_empty then return ""
1334
1335 var s = new Buffer # Result
1336
1337 # Concat first item
1338 var i = iterator
1339 var k = i.key
1340 var e = i.item
1341 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1342
1343 # Concat other _items
1344 i.next
1345 while i.is_ok do
1346 s.append(sep)
1347 k = i.key
1348 e = i.item
1349 s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1350 i.next
1351 end
1352 return s.to_s
1353 end
1354 end