1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # All the array-based text representations
14 intrude import abstract_text
22 private class FlatSubstringsIter
23 super Iterator[FlatText]
25 var tgt
: nullable FlatText
29 return tgt
.as(not null)
32 redef fun is_ok
do return tgt
!= null
34 redef fun next
do tgt
= null
39 # First byte of the NativeString
40 protected fun first_byte
: Int do return 0
42 # Last byte of the NativeString
43 protected fun last_byte
: Int do return first_byte
+ _byte_length
- 1
45 # Cache of the latest position (char) explored in the string
48 # Cached position (bytes) in the NativeString underlying the String
51 # Index of the character `index` in `_items`
52 fun char_to_byte_index
(index
: Int): Int do
53 var dpos
= index
- _position
58 if its
[b
] & 0x80u
8 == 0x00u
8 then
61 b
+= its
.length_of_char_at
(b
)
68 b
= its
.find_beginning_of_char_at
(b
- 1)
73 if dpos
== 0 then return b
77 # Find best insertion point
78 var delta_begin
= index
79 var delta_end
= (ln
- 1) - index
80 var delta_cache
= (pos
- index
).abs
83 if delta_cache
< min
then min
= delta_cache
84 if delta_end
< min
then min
= delta_end
89 if min
== delta_cache
then
92 else if min
== delta_begin
then
96 ns_i
= its
.find_beginning_of_char_at
(last_byte
)
100 ns_i
= its
.char_to_byte_index_cached
(index
, my_i
, ns_i
)
108 # By escaping `self` to HTML, how many more bytes will be needed ?
109 fun chars_to_html_escape
: Int do
118 else if c
== 0x3Eu
8 then
120 else if c
== 0x26u
8 then
122 else if c
== 0x22u
8 then
124 else if c
== 0x27u
8 then
126 else if c
== 0x2Fu
8 then
134 redef fun html_escape
136 var extra
= chars_to_html_escape
137 if extra
== 0 then return to_s
141 var nlen
= extra
+ _byte_length
142 var nits
= new NativeString(nlen
)
147 # Some HTML characters are used as meta-data, they need
148 # to be replaced by an HTML-Escaped equivalent
152 # * 0x26 (&) => &
153 # * 0x22 (") => "
154 # * 0x27 (') => '
155 # * 0x2F (/) => /
157 nits
[outpos
] = 0x26u
8
158 nits
[outpos
+ 1] = 0x6Cu
8
159 nits
[outpos
+ 2] = 0x74u
8
160 nits
[outpos
+ 3] = 0x3Bu
8
162 else if c
== 0x3Eu
8 then
163 nits
[outpos
] = 0x26u
8
164 nits
[outpos
+ 1] = 0x67u
8
165 nits
[outpos
+ 2] = 0x74u
8
166 nits
[outpos
+ 3] = 0x3Bu
8
168 else if c
== 0x26u
8 then
169 nits
[outpos
] = 0x26u
8
170 nits
[outpos
+ 1] = 0x61u
8
171 nits
[outpos
+ 2] = 0x6Du
8
172 nits
[outpos
+ 3] = 0x70u
8
173 nits
[outpos
+ 4] = 0x3Bu
8
175 else if c
== 0x22u
8 then
176 nits
[outpos
] = 0x26u
8
177 nits
[outpos
+ 1] = 0x23u
8
178 nits
[outpos
+ 2] = 0x33u
8
179 nits
[outpos
+ 3] = 0x34u
8
180 nits
[outpos
+ 4] = 0x3Bu
8
182 else if c
== 0x27u
8 then
183 nits
[outpos
] = 0x26u
8
184 nits
[outpos
+ 1] = 0x23u
8
185 nits
[outpos
+ 2] = 0x33u
8
186 nits
[outpos
+ 3] = 0x39u
8
187 nits
[outpos
+ 4] = 0x3Bu
8
189 else if c
== 0x2Fu
8 then
190 nits
[outpos
] = 0x26u
8
191 nits
[outpos
+ 1] = 0x23u
8
192 nits
[outpos
+ 2] = 0x34u
8
193 nits
[outpos
+ 3] = 0x37u
8
194 nits
[outpos
+ 4] = 0x3Bu
8
202 var s
= new FlatString.with_infos
(nits
, nlen
, 0)
206 # By escaping `self` to C, how many more bytes will be needed ?
208 # This enables a double-optimization in `escape_to_c` since if this
209 # method returns 0, then `self` does not need escaping and can be
211 fun chars_to_escape_to_c
: Int do
220 else if c
== 0x09u
8 then
222 else if c
== 0x22u
8 then
224 else if c
== 0x27u
8 then
226 else if c
== 0x5Cu
8 then
228 else if c
== 0x3Fu
8 then
232 # We ignore `??'` because it will be escaped as `??\'`.
244 else if c
< 32u8
then
252 redef fun escape_to_c
do
253 var ln_extra
= chars_to_escape_to_c
254 if ln_extra
== 0 then return self.to_s
257 var nlen
= _byte_length
+ ln_extra
258 var nns
= new NativeString(nlen
)
265 # Any byte with value < 32 is a control character
266 # All their uses will be replaced by their octal
269 # There are two exceptions however:
274 # Aside from the code points above, the following are:
281 nns
[opos
+ 1] = 0x74u
8
283 else if c
== 0x0Au
8 then
285 nns
[opos
+ 1] = 0x6Eu
8
287 else if c
== 0x22u
8 then
289 nns
[opos
+ 1] = 0x22u
8
291 else if c
== 0x27u
8 then
293 nns
[opos
+ 1] = 0x27u
8
295 else if c
== 0x5Cu
8 then
297 nns
[opos
+ 1] = 0x5Cu
8
299 else if c
== 0x3Fu
8 then
303 # We ignore `??'` because it will be escaped as `??\'`.
320 else if c
< 32u8
then
322 nns
[opos
+ 1] = 0x30u
8
323 nns
[opos
+ 2] = ((c
& 0x38u
8) >> 3) + 0x30u
8
324 nns
[opos
+ 3] = (c
& 0x07u
8) + 0x30u
8
332 return nns
.to_s_unsafe
(nlen
)
335 redef fun [](index
) do
339 # * ~70% want the next char
340 # * ~23% want the previous
341 # * ~7% want the same char
343 # So it makes sense to shortcut early. And early is here.
344 var dpos
= index
- _position
346 if dpos
== 1 and index
< len
- 1 then
349 if c
& 0x80u
8 == 0x00u
8 then
350 # We want the next, and current is easy.
351 # So next is easy to find!
355 # The rest will be done by `dpos==0` bellow.
358 else if dpos
== -1 and index
> 1 then
361 if c
& 0x80u
8 == 0x00u
8 then
362 # We want the previous, and it is easy.
371 # We know what we want (+0 or +1) just get it now!
374 if c
& 0x80u
8 == 0x00u
8 then return c
.ascii
375 return items
.char_at
(b
)
378 assert index
>= 0 and index
< len
379 return fetch_char_at
(index
)
382 # Gets a `Char` at `index` in `self`
384 # WARNING: Use at your own risks as no bound-checking is done
385 fun fetch_char_at
(index
: Int): Char do
386 var i
= char_to_byte_index
(index
)
389 if b
& 0x80u
8 == 0x00u
8 then return b
.ascii
390 return items
.char_at
(i
)
393 # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
395 # assert "ff".to_hex == 255
396 redef fun to_hex
(pos
, ln
) do
398 if pos
== null then pos
= 0
399 if ln
== null then ln
= length
- pos
400 pos
= char_to_byte_index
(pos
)
403 for i
in [pos
.. max
[ do
405 res
+= its
[i
].ascii
.from_hex
410 redef fun copy_to_native
(dst
, n
, src_off
, dst_off
) do
411 _items
.copy_to
(dst
, n
, first_byte
+ src_off
, dst_off
)
415 # Immutable strings of characters.
416 abstract class FlatString
420 # Index at which `self` begins in `_items`, inclusively
421 redef var first_byte
is noinit
423 redef fun chars
do return new FlatStringCharView(self)
425 redef fun bytes
do return new FlatStringByteView(self)
427 redef fun to_cstring
do
428 var blen
= _byte_length
429 var new_items
= new NativeString(blen
+ 1)
430 _items
.copy_to
(new_items
, blen
, _first_byte
, 0)
431 new_items
[blen
] = 0u8
435 redef fun reversed
do
436 var b
= new FlatBuffer.with_capacity
(_byte_length
+ 1)
439 b
.add
self.fetch_char_at
(i
)
442 var s
= b
.to_s
.as(FlatString)
443 s
._length
= self._length
447 redef fun fast_cstring
do return _items
.fast_cstring
(_first_byte
)
449 redef fun substring
(from
, count
)
451 if count
<= 0 then return ""
455 if count
<= 0 then return ""
460 if (count
+ from
) > ln
then count
= ln
- from
461 if count
<= 0 then return ""
462 var end_index
= from
+ count
- 1
463 return substring_impl
(from
, count
, end_index
)
466 private fun substring_impl
(from
, count
, end_index
: Int): String do
467 var cache
= _position
468 var dfrom
= (cache
- from
).abs
469 var dend
= (end_index
- from
).abs
474 bytefrom
= char_to_byte_index
(from
)
475 byteto
= char_to_byte_index
(end_index
)
477 byteto
= char_to_byte_index
(end_index
)
478 bytefrom
= char_to_byte_index
(from
)
482 byteto
+= its
.length_of_char_at
(byteto
) - 1
484 var s
= new FlatString.full
(its
, byteto
- bytefrom
+ 1, bytefrom
, count
)
488 redef fun empty
do return "".as(FlatString)
492 var outstr
= new FlatBuffer.with_capacity
(self._byte_length
+ 1)
498 outstr
.add
(chars
[pos
].to_upper
)
507 var outstr
= new FlatBuffer.with_capacity
(self._byte_length
+ 1)
513 outstr
.add
(chars
[pos
].to_lower
)
522 for i
in chars
do i
.output
525 ##################################################
526 # String Specific Methods #
527 ##################################################
529 # Low-level creation of a new string with minimal data.
531 # `_items` will be used as is, without copy, to retrieve the characters of the string.
532 # Aliasing issues is the responsibility of the caller.
533 private new with_infos
(items
: NativeString, byte_length
, from
: Int)
535 var len
= items
.utf8_length
(from
, byte_length
)
536 if byte_length
== len
then return new ASCIIFlatString.full_data
(items
, byte_length
, from
, len
)
537 return new UnicodeFlatString.full_data
(items
, byte_length
, from
, len
)
540 # Low-level creation of a new string with all the data.
542 # `_items` will be used as is, without copy, to retrieve the characters of the string.
543 # Aliasing issues is the responsibility of the caller.
544 private new full
(items
: NativeString, byte_length
, from
, length
: Int)
546 if byte_length
== length
then return new ASCIIFlatString.full_data
(items
, byte_length
, from
, length
)
547 return new UnicodeFlatString.full_data
(items
, byte_length
, from
, length
)
552 if not other
isa FlatText then return super
554 if self.object_id
== other
.object_id
then return true
556 var my_length
= _byte_length
558 if other
._byte_length
!= my_length
then return false
560 var my_index
= _first_byte
561 var its_index
= other
.first_byte
563 var last_iteration
= my_index
+ my_length
565 var its_items
= other
._items
566 var my_items
= self._items
568 while my_index
< last_iteration
do
569 if my_items
[my_index
] != its_items
[its_index
] then return false
579 if not other
isa FlatText then return super
581 if self.object_id
== other
.object_id
then return false
584 var itsits
= other
._items
586 var mbt
= _byte_length
587 var obt
= other
.byte_length
589 var minln
= if mbt
< obt
then mbt
else obt
590 var mst
= _first_byte
591 var ost
= other
.first_byte
593 for i
in [0 .. minln
[ do
594 var my_curr_char
= myits
[mst
]
595 var its_curr_char
= itsits
[ost
]
597 if my_curr_char
> its_curr_char
then return false
598 if my_curr_char
< its_curr_char
then return true
609 var slen
= s
.byte_length
610 var mlen
= _byte_length
611 var nlen
= mlen
+ slen
613 var mifrom
= _first_byte
614 if s
isa FlatText then
616 var sifrom
= s
.first_byte
617 var ns
= new NativeString(nlen
+ 1)
618 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
619 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
620 return new FlatString.full
(ns
, nlen
, 0, _length
+ o
.length
)
627 var mybtlen
= _byte_length
628 var new_byte_length
= mybtlen
* i
630 var newlen
= mylen
* i
633 var ns
= new NativeString(new_byte_length
+ 1)
634 ns
[new_byte_length
] = 0u8
637 its
.copy_to
(ns
, mybtlen
, fb
, offset
)
641 return new FlatString.full
(ns
, new_byte_length
, 0, newlen
)
646 if hash_cache
== null then
647 # djb2 hash algorithm
651 var my_items
= _items
655 h
= (h
<< 5) + h
+ my_items
[i
].to_i
662 return hash_cache
.as(not null)
665 redef fun substrings
do return new FlatSubstringsIter(self)
668 # Regular Nit UTF-8 strings
669 private class UnicodeFlatString
672 init full_data
(items
: NativeString, byte_length
, from
, length
: Int) do
674 self._length
= length
675 self._byte_length
= byte_length
680 redef fun substring_from
(from
) do
681 if from
>= self._length
then return empty
682 if from
<= 0 then return self
683 var c
= char_to_byte_index
(from
)
684 var st
= c
- _first_byte
685 var fln
= byte_length
- st
686 return new FlatString.full
(items
, fln
, c
, _length
- from
)
690 # Special cases of String where all the characters are ASCII-based
692 # Optimizes access operations to O(1) complexity.
693 private class ASCIIFlatString
696 init full_data
(items
: NativeString, byte_length
, from
, length
: Int) do
698 self._length
= length
699 self._byte_length
= byte_length
705 assert idx
< _byte_length
and idx
>= 0
706 return _items
[idx
+ _first_byte
].ascii
709 redef fun substring
(from
, count
) do
711 if count
<= 0 then return ""
712 if (count
+ from
) > ln
then count
= ln
- from
713 if count
<= 0 then return ""
716 if count
<= 0 then return ""
719 return new ASCIIFlatString.full_data
(_items
, count
, from
+ _first_byte
, count
)
722 redef fun reversed
do
723 var b
= new FlatBuffer.with_capacity
(_byte_length
+ 1)
729 var s
= b
.to_s
.as(FlatString)
733 redef fun char_to_byte_index
(index
) do return index
+ _first_byte
735 redef fun substring_impl
(from
, count
, end_index
) do
736 return new ASCIIFlatString.full_data
(_items
, count
, from
+ _first_byte
, count
)
739 redef fun fetch_char_at
(i
) do return _items
[i
+ _first_byte
].ascii
742 private class FlatStringCharReverseIterator
743 super IndexedIterator[Char]
745 var target
: FlatString
749 redef fun is_ok
do return curr_pos
>= 0
751 redef fun item
do return target
[curr_pos
]
753 redef fun next
do curr_pos
-= 1
755 redef fun index
do return curr_pos
759 private class FlatStringCharIterator
760 super IndexedIterator[Char]
762 var target
: FlatString
764 var max
: Int is noautoinit
768 init do max
= target
._length
- 1
770 redef fun is_ok
do return curr_pos
<= max
772 redef fun item
do return target
[curr_pos
]
774 redef fun next
do curr_pos
+= 1
776 redef fun index
do return curr_pos
780 private class FlatStringCharView
783 redef type SELFTYPE: FlatString
785 redef fun [](index
) do return target
[index
]
787 redef fun iterator_from
(start
) do return new FlatStringCharIterator(target
, start
)
789 redef fun reverse_iterator_from
(start
) do return new FlatStringCharReverseIterator(target
, start
)
793 private class FlatStringByteReverseIterator
794 super IndexedIterator[Byte]
796 var target
: FlatString
798 var target_items
: NativeString is noautoinit
805 target_items
= tgt
._items
806 curr_pos
+= tgt
._first_byte
809 redef fun is_ok
do return curr_pos
>= target
._first_byte
811 redef fun item
do return target_items
[curr_pos
]
813 redef fun next
do curr_pos
-= 1
815 redef fun index
do return curr_pos
- target
._first_byte
819 private class FlatStringByteIterator
820 super IndexedIterator[Byte]
822 var target
: FlatString
824 var target_items
: NativeString is noautoinit
831 target_items
= tgt
._items
832 curr_pos
+= tgt
._first_byte
835 redef fun is_ok
do return curr_pos
<= target
.last_byte
837 redef fun item
do return target_items
[curr_pos
]
839 redef fun next
do curr_pos
+= 1
841 redef fun index
do return curr_pos
- target
._first_byte
845 private class FlatStringByteView
848 redef type SELFTYPE: FlatString
852 # Check that the index (+ _first_byte) is not larger than last_byte
853 # In other terms, if the index is valid
855 assert index
>= 0 and index
< target
._byte_length
856 var ind
= index
+ target
._first_byte
857 return target
._items
[ind
]
860 redef fun iterator_from
(start
) do return new FlatStringByteIterator(target
, start
)
862 redef fun reverse_iterator_from
(start
) do return new FlatStringByteReverseIterator(target
, start
)
867 redef new do return new FlatBuffer
869 redef new with_cap
(i
) do return new FlatBuffer.with_capacity
(i
)
872 # Mutable strings of characters.
877 redef fun chars
do return new FlatBufferCharView(self)
879 redef fun bytes
do return new FlatBufferByteView(self)
881 private var capacity
= 0
883 redef fun fast_cstring
do return _items
.fast_cstring
(0)
885 redef fun substrings
do return new FlatSubstringsIter(self)
887 # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
889 # This happens when an operation modifies the current `Buffer` and
890 # the Copy-On-Write flag `written` is set at true.
892 var nns
= new NativeString(capacity
)
893 if _byte_length
!= 0 then _items
.copy_to
(nns
, _byte_length
, 0, 0)
898 # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
900 # Internal only, does not modify _byte_length or length, this is the caller's responsability
901 private fun rshift_bytes
(from
: Int, len
: Int) do
904 var bt
= _byte_length
905 if bt
+ len
> capacity
then
906 capacity
= capacity
* 2 + 2
907 nit
= new NativeString(capacity
)
908 oit
.copy_to
(nit
, 0, 0, from
)
910 oit
.copy_to
(nit
, bt
- from
, from
, from
+ len
)
913 # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
915 # Internal only, does not modify _byte_length or length, this is the caller's responsability
916 private fun lshift_bytes
(from
: Int, len
: Int) do
918 it
.copy_to
(it
, _byte_length
- from
, from
, from
- len
)
921 redef fun []=(index
, item
)
923 assert index
>= 0 and index
<= _length
924 if written
then reset
925 if index
== _length
then
930 var ip
= it
.char_to_byte_index
(index
)
931 var c
= it
.char_at
(ip
)
932 var clen
= c
.u8char_len
933 var itemlen
= item
.u8char_len
934 var size_diff
= itemlen
- clen
935 if size_diff
> 0 then
936 rshift_bytes
(ip
+ clen
, size_diff
)
937 else if size_diff
< 0 then
938 lshift_bytes
(ip
+ clen
, -size_diff
)
940 _byte_length
+= size_diff
941 it
.set_char_at
(ip
, item
)
944 redef fun insert
(s
, pos
) do
945 assert pos
>= 0 and pos
<= length
946 if pos
== length
then
950 var slen
= s
.byte_length
951 enlarge
(byte_length
+ slen
)
953 var shpos
= it
.char_to_byte_index
(pos
)
954 rshift_bytes
(shpos
, slen
)
955 s
.copy_to_native
(it
, slen
, 0, shpos
)
960 redef fun insert_char
(c
, pos
) do
961 assert pos
>= 0 and pos
<= length
962 if pos
== length
then
966 var clen
= c
.u8char_len
967 enlarge
(byte_length
+ clen
)
969 var shpos
= it
.char_to_byte_index
(pos
)
970 rshift_bytes
(shpos
, clen
)
971 it
.set_char_at
(shpos
, c
)
978 if written
then reset
979 var clen
= c
.u8char_len
980 var bt
= _byte_length
982 _items
.set_char_at
(bt
, c
)
996 redef fun empty
do return new Buffer
998 redef fun enlarge
(cap
)
1001 if cap
<= c
then return
1002 if c
<= 16 then c
= 16
1003 while c
<= cap
do c
= c
* 2
1004 # The COW flag can be set at false here, since
1005 # it does a copy of the current `Buffer`
1007 var bln
= _byte_length
1008 var a
= new NativeString(c
)
1011 if bln
> 0 then it
.copy_to
(a
, bln
, 0, 0)
1020 var bln
= _byte_length
1021 if bln
== 0 then _items
= new NativeString(1)
1022 return new FlatString.full
(_items
, bln
, 0, _length
)
1025 redef fun to_cstring
1027 var bln
= _byte_length
1028 var new_native
= new NativeString(bln
+ 1)
1029 new_native
[bln
] = 0u8
1030 if _length
> 0 then _items
.copy_to
(new_native
, bln
, 0, 0)
1034 # Create a new empty string.
1037 # Low-level creation a new buffer with given data.
1039 # `_items` will be used as is, without copy, to store the characters of the buffer.
1040 # Aliasing issues is the responsibility of the caller.
1042 # If `_items` is shared, `written` should be set to true after the creation
1043 # so that a modification will do a copy-on-write.
1044 private init with_infos
(items
: NativeString, capacity
, byte_length
, length
: Int)
1047 self.capacity
= capacity
1048 self._byte_length
= byte_length
1049 self._length
= length
1052 # Create a new string copied from `s`.
1055 _items
= new NativeString(s
.byte_length
)
1056 for i
in s
.substrings
do i
._items
.copy_to
(_items
, i
._byte_length
, first_byte
, 0)
1057 _byte_length
= s
.byte_length
1059 _capacity
= _byte_length
1062 # Create a new empty string with a given capacity.
1063 init with_capacity
(cap
: Int)
1066 _items
= new NativeString(cap
)
1073 if s
.is_empty
then return
1074 var sl
= s
.byte_length
1075 var nln
= _byte_length
+ sl
1077 if s
isa FlatText then
1078 s
._items
.copy_to
(_items
, sl
, s
.first_byte
, _byte_length
)
1080 for i
in s
.substrings
do append i
1087 # Copies the content of self in `dest`
1088 fun copy
(start
: Int, len
: Int, dest
: Buffer, new_start
: Int)
1090 var self_chars
= self.chars
1091 var dest_chars
= dest
.chars
1092 for i
in [0..len-1
] do
1093 dest_chars
[new_start
+i
] = self_chars
[start
+i
]
1097 redef fun substring
(from
, count
)
1100 if from
< 0 then from
= 0
1101 if (from
+ count
) > _length
then count
= _length
- from
1102 if count
<= 0 then return new Buffer
1104 var bytefrom
= its
.char_to_byte_index
(from
)
1105 var byteto
= its
.char_to_byte_index
(count
+ from
- 1)
1106 byteto
+= its
.char_at
(byteto
).u8char_len
- 1
1107 var byte_length
= byteto
- bytefrom
+ 1
1108 var r_items
= new NativeString(byte_length
)
1109 its
.copy_to
(r_items
, byte_length
, bytefrom
, 0)
1110 return new FlatBuffer.with_infos
(r_items
, byte_length
, byte_length
, count
)
1113 redef fun append_substring_impl
(s
, from
, length
) do
1114 if length
<= 0 then return
1115 if not s
isa FlatText then
1120 var bytest
= s
.char_to_byte_index
(from
)
1121 var bytend
= s
.char_to_byte_index
(from
+ length
- 1)
1122 var btln
= bytend
- bytest
+ sits
.char_at
(bytend
).u8char_len
1123 enlarge
(btln
+ _byte_length
)
1124 sits
.copy_to
(_items
, btln
, bytest
, _byte_length
)
1125 _byte_length
+= btln
1129 redef fun remove_at
(p
, len
) do
1130 if len
== null then len
= 1
1131 if len
== 0 then return
1133 var bst
= char_to_byte_index
(p
)
1134 var bend
= char_to_byte_index
(p
+ len
- 1)
1135 bend
+= its
.char_at
(bend
).u8char_len
1136 var blen
= bend
- bst
1137 lshift_bytes
(bend
, bend
- bst
)
1145 var ns
= new FlatBuffer.with_capacity
(capacity
)
1146 for i
in chars
.reverse_iterator
do ns
.add i
1150 redef fun times
(repeats
)
1152 var bln
= _byte_length
1153 var x
= new FlatString.full
(_items
, bln
, 0, _length
)
1154 for i
in [1 .. repeats
[ do
1161 if written
then reset
1162 for i
in [0 .. _length
[ do self[i
] = self[i
].to_upper
1167 if written
then reset
1168 for i
in [0 .. _length
[ do self[i
] = self[i
].to_lower
1172 private class FlatBufferByteReverseIterator
1173 super IndexedIterator[Byte]
1175 var target
: FlatBuffer
1177 var target_items
: NativeString is noautoinit
1181 init do target_items
= target
._items
1183 redef fun index
do return curr_pos
1185 redef fun is_ok
do return curr_pos
>= 0
1187 redef fun item
do return target_items
[curr_pos
]
1189 redef fun next
do curr_pos
-= 1
1193 private class FlatBufferByteView
1194 super BufferByteView
1196 redef type SELFTYPE: FlatBuffer
1198 redef fun [](index
) do return target
._items
[index
]
1200 redef fun iterator_from
(pos
) do return new FlatBufferByteIterator(target
, pos
)
1202 redef fun reverse_iterator_from
(pos
) do return new FlatBufferByteReverseIterator(target
, pos
)
1206 private class FlatBufferByteIterator
1207 super IndexedIterator[Byte]
1209 var target
: FlatBuffer
1211 var target_items
: NativeString is noautoinit
1215 init do target_items
= target
._items
1217 redef fun index
do return curr_pos
1219 redef fun is_ok
do return curr_pos
< target
._byte_length
1221 redef fun item
do return target_items
[curr_pos
]
1223 redef fun next
do curr_pos
+= 1
1227 private class FlatBufferCharReverseIterator
1228 super IndexedIterator[Char]
1230 var target
: FlatBuffer
1234 redef fun index
do return curr_pos
1236 redef fun is_ok
do return curr_pos
>= 0
1238 redef fun item
do return target
[curr_pos
]
1240 redef fun next
do curr_pos
-= 1
1244 private class FlatBufferCharView
1245 super BufferCharView
1247 redef type SELFTYPE: FlatBuffer
1249 redef fun [](index
) do return target
[index
]
1251 redef fun []=(index
, item
)
1253 assert index
>= 0 and index
<= length
1254 if index
== length
then
1258 target
[index
] = item
1271 fun enlarge
(cap
: Int)
1278 var s_length
= s
.length
1279 if target
.capacity
< s
.length
then enlarge
(s_length
+ target
._length
)
1280 for i
in s
do target
.add i
1283 redef fun iterator_from
(pos
) do return new FlatBufferCharIterator(target
, pos
)
1285 redef fun reverse_iterator_from
(pos
) do return new FlatBufferCharReverseIterator(target
, pos
)
1289 private class FlatBufferCharIterator
1290 super IndexedIterator[Char]
1292 var target
: FlatBuffer
1294 var max
: Int is noautoinit
1298 init do max
= target
._length
- 1
1300 redef fun index
do return curr_pos
1302 redef fun is_ok
do return curr_pos
<= max
1304 redef fun item
do return target
[curr_pos
]
1306 redef fun next
do curr_pos
+= 1
1310 redef class NativeString
1313 return to_s_with_length
(cstring_length
)
1316 redef fun to_s_with_length
(length
)
1319 return clean_utf8
(length
)
1322 redef fun to_s_full
(byte_length
, unilen
) do
1323 return new FlatString.full
(self, byte_length
, 0, unilen
)
1326 redef fun to_s_unsafe
(len
) do
1327 if len
== null then len
= cstring_length
1328 return new FlatString.with_infos
(self, len
, 0)
1331 redef fun to_s_with_copy
do return to_s_with_copy_and_length
(cstring_length
)
1333 # Get a `String` from `length` bytes at `self` copied into Nit memory
1334 fun to_s_with_copy_and_length
(length
: Int): String
1336 var r
= clean_utf8
(length
)
1337 if r
.items
!= self then return r
1338 var new_self
= new NativeString(length
+ 1)
1339 copy_to
(new_self
, length
, 0, 0)
1340 var str
= new FlatString.with_infos
(new_self
, length
, 0)
1341 new_self
[length
] = 0u8
1345 # Cleans a NativeString if necessary
1346 fun clean_utf8
(len
: Int): FlatString do
1347 var replacements
: nullable Array[Int] = null
1348 var end_length
= len
1354 var i
= fetch_4_chars
(pos
)
1355 if i
& 0x80808080 != 0 then break
1360 if rem
== 0 then break
1362 if b
& 0x80u
8 == 0x00u
8 then
1368 var nxst
= length_of_char_at
(pos
)
1371 ok_st
= b
& 0x80u
8 == 0u8
1372 else if nxst
== 2 then
1373 ok_st
= b
& 0xE0u
8 == 0xC0u
8
1374 else if nxst
== 3 then
1375 ok_st
= b
& 0xF0u
8 == 0xE0u
8
1377 ok_st
= b
& 0xF8u
8 == 0xF0u
8
1380 if replacements
== null then replacements
= new Array[Int]
1381 replacements
.add pos
1389 var c
= char_at
(pos
)
1390 var cp
= c
.code_point
1392 ok_c
= cp
>= 0 and cp
<= 0x7F
1393 else if nxst
== 2 then
1394 ok_c
= cp
>= 0x80 and cp
<= 0x7FF
1395 else if nxst
== 3 then
1396 ok_c
= cp
>= 0x800 and cp
<= 0xFFFF
1397 ok_c
= ok_c
and not (cp
>= 0xD800 and cp
<= 0xDFFF) and cp
!= 0xFFFE and cp
!= 0xFFFF
1399 ok_c
= cp
>= 0x10000 and cp
<= 0x10FFFF
1402 if replacements
== null then replacements
= new Array[Int]
1403 replacements
.add pos
1410 var clen
= c
.u8char_len
1416 if end_length
!= len
then
1417 ret
= new NativeString(end_length
)
1420 var repls
= replacements
.as(not null)
1421 var r
= repls
.items
.as(not null)
1422 var imax
= repls
.length
1423 for i
in [0 .. imax
[ do
1425 var chkln
= repl_pos
- old_repl
1426 copy_to
(ret
, chkln
, old_repl
, off
)
1429 ret
[off
+ 1] = 0xBFu
8
1430 ret
[off
+ 2] = 0xBDu
8
1431 old_repl
= repl_pos
+ 1
1434 copy_to
(ret
, len
- old_repl
, old_repl
, off
)
1436 return new FlatString.full
(ret
, end_length
, 0, chr_ln
)
1439 # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
1441 # Very unsafe, make sure to have room for this char prior to calling this function.
1442 private fun set_char_at
(pos
: Int, c
: Char) do
1443 var cp
= c
.code_point
1448 var ln
= c
.u8char_len
1450 self[pos
] = (0xC0 | ((cp
& 0x7C0) >> 6)).to_b
1451 self[pos
+ 1] = (0x80 | (cp
& 0x3F)).to_b
1452 else if ln
== 3 then
1453 self[pos
] = (0xE0 | ((cp
& 0xF000) >> 12)).to_b
1454 self[pos
+ 1] = (0x80 | ((cp
& 0xFC0) >> 6)).to_b
1455 self[pos
+ 2] = (0x80 | (cp
& 0x3F)).to_b
1456 else if ln
== 4 then
1457 self[pos
] = (0xF0 | ((cp
& 0x1C0000) >> 18)).to_b
1458 self[pos
+ 1] = (0x80 | ((cp
& 0x3F000) >> 12)).to_b
1459 self[pos
+ 2] = (0x80 | ((cp
& 0xFC0) >> 6)).to_b
1460 self[pos
+ 3] = (0x80 | (cp
& 0x3F)).to_b
1466 # return displayable int in base 10 and signed
1468 # assert 1.to_s == "1"
1469 # assert (-123).to_s == "-123"
1471 # Fast case for common numbers
1472 if self == 0 then return "0"
1473 if self == 1 then return "1"
1475 var nslen
= int_to_s_len
1476 var ns
= new NativeString(nslen
+ 1)
1478 native_int_to_s
(ns
, nslen
+ 1)
1479 return new FlatString.full
(ns
, nslen
, 0, nslen
)
1483 redef class Array[E
]
1485 # Fast implementation
1486 redef fun plain_to_s
1489 if l
== 0 then return ""
1490 var its
= _items
.as(not null)
1492 if l
== 1 then if first
== null then return "" else return first
.to_s
1493 var na
= new NativeArray[String](l
)
1499 if itsi
== null then
1504 sl
+= tmp
.byte_length
1509 var ns
= new NativeString(sl
+ 1)
1515 if tmp
isa FlatString then
1516 var tpl
= tmp
._byte_length
1517 tmp
._items
.copy_to
(ns
, tpl
, tmp
._first_byte
, off
)
1520 for j
in tmp
.substrings
do
1521 var s
= j
.as(FlatString)
1522 var slen
= s
._byte_length
1523 s
._items
.copy_to
(ns
, slen
, s
._first_byte
, off
)
1529 return new FlatString.with_infos
(ns
, sl
, 0)
1533 redef class NativeArray[E
]
1534 redef fun native_to_s
do
1535 assert self isa NativeArray[String]
1542 sl
+= na
[i
].byte_length
1546 var ns
= new NativeString(sl
+ 1)
1552 if tmp
isa FlatString then
1553 var tpl
= tmp
._byte_length
1554 tmp
._items
.copy_to
(ns
, tpl
, tmp
._first_byte
, off
)
1557 for j
in tmp
.substrings
do
1558 var s
= j
.as(FlatString)
1559 var slen
= s
._byte_length
1560 s
._items
.copy_to
(ns
, slen
, s
._first_byte
, off
)
1566 return new FlatString.with_infos
(ns
, sl
, 0)
1570 redef class Map[K
,V
]
1571 redef fun join
(sep
, couple_sep
)
1573 if is_empty
then return ""
1575 var s
= new Buffer # Result
1581 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
1583 # Concat other _items
1589 s
.append
("{k or else "<null>"}{couple_sep}{e or else "<null>"}")